From 7b96ed6422a0e752ad17b978a2f9741eb9f1ce78 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 16 May 2023 06:05:48 +0000 Subject: [PATCH] import qemu-kvm-6.2.0-32.module+el8.8.0+18361+9f407f6e --- .gitignore | 1 + .qemu-kvm.metadata | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 17931 ++++++++++++++++ SOURCES/0005-Initial-redhat-build.patch | 351 + ...0006-Enable-disable-devices-for-RHEL.patch | 795 + ...Machine-type-related-general-changes.patch | 1071 + SOURCES/0008-Add-aarch64-machine-types.patch | 405 + SOURCES/0009-Add-ppc64-machine-types.patch | 714 + SOURCES/0010-Add-s390x-machine-types.patch | 165 + SOURCES/0011-Add-x86_64-machine-types.patch | 1276 ++ SOURCES/0012-Enable-make-check.patch | 407 + ...mber-of-devices-that-can-be-assigned.patch | 110 + ...Add-support-statement-to-help-output.patch | 55 + ...lly-limit-the-maximum-number-of-CPUs.patch | 65 + ...documentation-instead-of-qemu-system.patch | 126 + ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 66 + ...e-at-least-64kiB-pages-for-downstrea.patch | 60 + ...019-compat-Update-hw_compat_rhel_8_5.patch | 53 + ...pdate-pseries-rhel8.5.0-machine-type.patch | 43 + ...8.5.0-Update-machine-type-compatibil.patch | 51 + ...22-Fix-virtio-net-pci-vectors-compat.patch | 45 + ...machine-types-Add-pc_rhel_8_5_compat.patch | 73 + ...-types-Wire-compat-into-q35-and-i440.patch | 54 + ...-machine-type-compatibility-handling.patch | 58 + SOURCES/81-kvm-rhel.rules | 1 + SOURCES/85-kvm.preset | 5 + SOURCES/95-kvm-memlock.conf | 10 + SOURCES/99-qemu-guest-agent.rules | 2 + SOURCES/README.tests | 39 + SOURCES/bridge.conf | 1 + SOURCES/ksm.service | 13 + SOURCES/ksm.sysconfig | 4 + SOURCES/ksmctl.c | 77 + SOURCES/ksmtuned | 139 + SOURCES/ksmtuned.conf | 21 + SOURCES/ksmtuned.service | 12 + ...sync-missed-zero-copy-migration-stat.patch | 87 + SOURCES/kvm-Enable-SGX-RH-Only.patch | 28 + ...kvm-KVM-keep-track-of-running-ioctls.patch | 82 + ...nd-invalid-CPUID-0xD-9-info-on-some-.patch | 109 + ...lags-on-io_writev-and-introduce-io_f.patch | 420 + ...et-Add-support-for-MSG_ZEROCOPY-IPV6.patch | 56 + ...-Fix-zero-copy-flush-returning-code-.patch | 65 + ...-Fix-zero-copy-send-so-socket-flush-.patch | 58 + ...-Implement-io_writev-zero-copy-flag-.patch | 249 + ...-Introduce-assert-and-reduce-ifdefs-.patch | 82 + ...packet-for-vhost-vsock-device-in-rhe.patch | 107 + ...d-hw_compat_4_2_extra-and-apply-to-u.patch | 93 + ...d-some-devices-for-exporting-upstrea.patch | 128 + ...able-FDC-device-for-upstream-machine.patch | 53 + ...pose-upstream-machines-pc-4.2-and-pc.patch | 191 + ...kvm-Update-linux-headers-to-v6.0-rc4.patch | 171 + ...el-introduce-accelerator-blocker-API.patch | 349 + ...acpi-fix-OEM-ID-OEM-Table-ID-padding.patch | 78 + ...U-crash-when-started-with-SLIC-table.patch | 108 + ...pcie-set-power-on-cap-on-parent-slot.patch | 140 + ...-validate-hotplug-selector-on-access.patch | 51 + ...aio-wait-switch-to-smp_mb__after_rmw.patch | 50 + ...wait_kick-add-missing-memory-barrier.patch | 86 + ...sage-of-barriers-in-the-polling-case.patch | 66 + ...documentation-of-the-memory-barriers.patch | 111 + ...-Fix-support-of-memory-backend-memfd.patch | 71 + ...ntext-for-drain_end-in-blockdev-reop.patch | 63 + ...ke-bdrv_refresh_limits-non-recursive.patch | 78 + ...event-dangling-BDS-pointers-across-a.patch | 129 + ...Update-BSC-only-if-want_zero-is-true.patch | 56 + ...mirror-Do-not-wait-for-active-writes.patch | 153 + ...r-Drop-mirror_wait_for_any_operation.patch | 76 + ...rror-Fix-NULL-s-job-in-active-writes.patch | 75 + ...sert-there-are-no-timers-when-closed.patch | 52 + ...lete-reconnect-delay-timer-when-done.patch | 54 + ...-nbd-Move-s-ioc-on-AioContext-change.patch | 107 + ...ndling-of-holes-in-.bdrv_co_block_st.patch | 59 + ...-rbd-workaround-for-ceph-issue-53784.patch | 103 + ...er-fix-race-condition-in-qxl_cursor-.patch | 58 + ...ent-dma_blk_cb-vs-dma_aio_cancel-rac.patch | 127 + ...kvm-doc-Add-the-SGX-numa-description.patch | 77 + ...x-Document-the-loadparm-machine-prop.patch | 70 + ...cture-section-and-section-string-tab.patch | 356 + .../kvm-dump-Add-more-offset-variables.patch | 138 + ...p-Cleanup-dump_begin-write-functions.patch | 94 + ...m-dump-Consolidate-elf-note-function.patch | 67 + ...vm-dump-Consolidate-phdr-note-writes.patch | 169 + ...roduce-dump_is_64bit-helper-function.patch | 118 + ...duce-shdr_num-to-decrease-complexity.patch | 136 + ...mp_iterate-and-introduce-dump_filter.patch | 142 + ...uce-memory_offset-and-section_offset.patch | 45 + ...section-if-when-calculating-the-memo.patch | 70 + ...kvm-dump-Remove-the-sh_info-variable.patch | 176 + ...e_elf-_phdr_note-to-prepare_elf-_phd.patch | 69 + ...te_elf_loads-to-write_elf_phdr_loads.patch | 57 + .../kvm-dump-Reorder-struct-DumpState.patch | 68 + ...que-DumpState-pointer-with-a-typed-o.patch | 467 + ...-Rework-dump_calculate_size-function.patch | 73 + ...vm-dump-Rework-filter-area-variables.patch | 187 + SOURCES/kvm-dump-Rework-get_start_block.patch | 102 + ...eader-functions-into-prepare-and-wri.patch | 173 + SOURCES/kvm-dump-Use-ERRP_GUARD.patch | 420 + ...fer-for-ELF-section-data-and-headers.patch | 150 + ...ection-headers-right-after-ELF-heade.patch | 104 + ...dump-to-work-over-non-aligned-blocks.patch | 173 + ...p-simplify-a-bit-kdump-get_next_page.patch | 75 + SOURCES/kvm-edu-add-smp_mb__after_rmw.patch | 61 + ..._status-hook-implementation-for-acpi.patch | 81 + ...kvm-hw-arm-virt-Add-8.6-machine-type.patch | 57 + ...k-no_tcg_its-and-minor-style-changes.patch | 86 + ...t-Register-iommu-as-a-class-property.patch | 78 + ...irt-Register-its-as-a-class-property.patch | 57 + ...virt-Rename-default_bus_bypass_iommu.patch | 46 + ...vent-end-of-track-overrun-CVE-2021-3.patch | 97 + ...ssert-memory-slot-fits-in-preallocat.patch | 52 + ...void-buffer-overrun-in-qxl_phys2virt.patch | 130 + ...w-display-qxl-Document-qxl_phys2virt.patch | 70 + ...ave-qxl_log_command-Return-early-if-.patch | 74 + ...ass-requested-buffer-size-to-qxl_phy.patch | 234 + ...3-Check-for-MEMTX_OK-instead-of-MEMT.patch | 75 + ...ix-leak-of-host-notifier-memory-regi.patch | 66 + ...-Server-v6-CPU-model-with-5-level-EP.patch | 59 + ...msr_feature_control-first-thing-when.patch | 67 + ...eset-KVM-nested-state-upon-CPU-reset.patch | 94 + ...ent-BB-in-flight-counter-for-TRIM-BH.patch | 92 + ...m-include-elf.h-add-s390x-note-types.patch | 43 + ...port-for-MSG_PEEK-for-socket-channel.patch | 367 + ...08-Fix-when-missing-user_allow_other.patch | 52 + ...-Test-new-refcount-rebuild-algorithm.patch | 445 + ...Let-NBD-connection-yield-in-iothread.patch | 108 + ...vm-iotests-281-Test-lingering-timers.patch | 174 + ...iotests-Allow-using-QMP-with-the-QSD.patch | 99 + ...ckdev-reopen-with-iothreads-and-thro.patch | 106 + ...-iotests-block-status-cache-New-test.patch | 197 + ...ests-graph-changes-while-io-New-test.patch | 153 + ...tests-stream-error-on-reset-New-test.patch | 198 + ...tests.py-Add-QemuStorageDaemon-class.patch | 92 + SOURCES/kvm-kvm-Atomic-memslot-updates.patch | 290 + ...n-why-max-batch-is-checked-in-laio_i.patch | 49 + ...balanced-plugged-counter-in-laio_io_.patch | 56 + ...-headers-Update-headers-to-v5.17-rc1.patch | 1227 ++ ...rs-include-missing-changes-from-5.17.patch | 58 + ...kvm-linux-headers-update-to-5.16-rc1.patch | 725 + ...migration-Add-migrate_use_tls-helper.patch | 106 + ...migration_incoming_transport_cleanup.patch | 102 + ...ro-copy-send-parameter-for-QMP-HMP-f.patch | 250 + ...gration-All-this-fields-are-unsigned.patch | 329 + ...migrate-recover-to-run-multiple-time.patch | 98 + ...false-positive-on-non-supported-scen.patch | 93 + ...-zero_copy_send-from-migration-param.patch | 289 + ...ration-Introduce-ram_transferred_add.patch | 122 + ...ver-call-twice-qemu_target_page_size.patch | 116 + SOURCES/kvm-migration-Read-state-once.patch | 76 + ...pre-copy-downtime-and-post-copy-byte.patch | 122 + ...maining-params-has_-true-in-migratio.patch | 62 + ...magic-value-for-deciding-the-mapping.patch | 296 + ...d-Report-to-user-when-zerocopy-not-w.patch | 83 + ...vm-multifd-Add-missing-documentation.patch | 82 + ...-Fill-offset-and-block-for-reception.patch | 50 + ...t-zero-copy-write-in-multifd-migrati.patch | 182 + ...zlib-compression-method-not-use-iovs.patch | 98 + SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch | 53 + ...zstd-compression-method-not-use-iovs.patch | 94 + SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch | 53 + ...ultifd-Move-iov-from-pages-to-params.patch | 190 + ...kvm-multifd-Remove-send_write-method.patch | 160 + ...kvm-multifd-Rename-used-field-to-num.patch | 177 + ...der-packet-without-flags-if-zero-cop.patch | 102 + ...ariable-is-only-used-inside-the-loop.patch | 48 + ...Use-a-single-writev-on-the-send-side.patch | 80 + ...-normal-pages-array-on-the-send-side.patch | 261 + ...send_sync_main-now-returns-negative-.patch | 163 + ...sed-parameter-from-send_prepare-meth.patch | 135 + ...sed-parameter-from-send_recv_pages-m.patch | 149 + ...uma-Enable-numa-for-SGX-EPC-sections.patch | 287 + ...-numa-in-the-monitor-and-Libvirt-int.patch | 210 + ...-Fix-booting-with-logical-block-size.patch | 63 + ...-Split-virtio-scsi-code-from-virtio_.patch | 180 + ...-bootmap-Improve-the-guessing-logic-.patch | 102 + ...-virtio-Beautify-the-code-for-readin.patch | 56 + ...-virtio-Introduce-a-macro-for-the-DA.patch | 63 + ...-virtio-Read-device-config-after-fea.patch | 67 + ...-virtio-Set-missing-status-bits-whil.patch | 93 + ...-virtio-blkdev-Remove-virtio_assume_.patch | 101 + ...-virtio-blkdev-Request-the-right-fea.patch | 63 + ...-virtio-blkdev-Simplify-fix-virtio_i.patch | 124 + ...-expose-TYPE_XIO3130_DOWNSTREAM-name.patch | 83 + ...m-physmem-add-missing-memory-barrier.patch | 55 + ...-related-comments-and-restore-sectio.patch | 214 + ...qatomic-add-smp_mb__before-after_rmw.patch | 177 + ...d-errp-to-rebuild_refcount_structure.patch | 162 + ...tical-corruption-in-store_bitmap-err.patch | 67 + ...mprove-refcount-structure-rebuilding.patch | 465 + ...coroutine-lock-add-smp_mb__after_rmw.patch | 75 + ...Report-errors-while-closing-the-imag.patch | 70 + ...Report-errors-while-closing-the-imag.patch | 67 + ...t-qemu-img-bitmap-commit-exit-code-o.patch | 166 + ...posix-cleanup-fix-document-QemuEvent.patch | 146 + ...win32-cleanup-fix-document-QemuEvent.patch | 162 + ...Add-rhel8.6.0-machine-type-for-s390x.patch | 69 + ...efine-pseries-rhel8.6.0-machine-type.patch | 76 + ...nux-headers-linux-kvm.h-to-v5.18-rc6.patch | 111 + ...machine-types-x86-set-prefer_sockets.patch | 52 + .../kvm-s390x-Add-KVM-PV-dump-interface.patch | 124 + .../kvm-s390x-Add-protected-dump-cap.patch | 113 + ...m-s390x-Introduce-PV-query-interface.patch | 174 + ...YPE_S390_CCW_MACHINE-properties-as-c.patch | 209 + .../kvm-s390x-css-fix-PMCW-invalid-mask.patch | 58 + ...rt-extended-kernel-command-line-size.patch | 97 + ...-second-pass-when-mappings-exhausted.patch | 114 + ...utine-to-get-host-function-handle-fr.patch | 178 + ...pported-DT-information-to-clp-respon.patch | 99 + ...-s390x-pci-coalesce-unmap-operations.patch | 125 + ...fence-interpreted-devices-without-MS.patch | 60 + ...use-hard-coded-dma-range-in-reg_ioat.patch | 77 + ...-adapter-event-notification-for-inte.patch | 265 + ...enable-for-load-store-interpretation.patch | 319 + ...tercept-devices-have-separate-PCI-gr.patch | 192 + ...t-proper-maxstbl-for-groups-of-inter.patch | 52 + ...ISM-passthrough-devices-on-shutdown-.patch | 147 + ...-DMA-aperture-to-be-bound-by-vfio-DM.patch | 91 + ...reserved-ID-for-the-default-PCI-grou.patch | 49 + ...e-passthrough-measurement-update-int.patch | 59 + SOURCES/kvm-s390x-pv-Add-dump-support.patch | 445 + ...o-ccw-Switch-off-zPCI-enhancements-o.patch | 55 + ...n-t-save-restore-the-TOD-in-PV-guest.patch | 70 + SOURCES/kvm-s390x.conf | 19 + ...c-Fix-emulated-block-limits-VPD-page.patch | 97 + ...otect-req-aiocb-with-AioContext-lock.patch | 176 + SOURCES/kvm-setup | 49 + SOURCES/kvm-setup.service | 14 + ...ce-deletion-events-with-device-JSON-.patch | 131 + ...Introduce-MemTxAttrs-memory-field-an.patch | 175 + ...Simplify-flatview_write-and-address_.patch | 80 + ...etry-KVM_CREATE_VM-call-if-it-fails-.patch | 65 + ...do-not-access-uninitialized-variable.patch | 73 + ...get-i386-properly-reset-TSC-on-reset.patch | 83 + ...h_dump-Fix-memory-corruption-in-s390.patch | 50 + ...-Honor-storage-keys-during-emulation.patch | 106 + ...ests-acpi-SLIC-update-expected-blobs.patch | 47 + .../kvm-tests-acpi-add-SLIC-table-test.patch | 76 + ...lly-pad-OEM_ID-OEM_TABLE_ID-for-test.patch | 84 + ...short-OEM_ID-OEM_TABLE_ID-values-in-.patch | 77 + ...kvm-tests-acpi-update-expected-blobs.patch | 58 + ...list-expected-blobs-before-changing-.patch | 47 + ...list-nvdimm-s-SSDT-and-FACP.slic-exp.patch | 57 + ...test-Add-a-regression-test-for-CVE-2.patch | 120 + ...teger-overflow-in-cursor_alloc-CVE-2.patch | 105 + ...-fix-integer-underflow-in-vnc_client.patch | 80 + .../kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch | 90 + ...-improper-cleanup-in-vhost_net_start.patch | 56 + ...backend-feature-should-set-only-once.patch | 58 + ...e-name-and-polarity-for-vhost_vdpa_o.patch | 126 + ...mproper-cleanup-in-net_init_vhost_vd.patch | 48 + ...ch-the-virqueue-element-in-case-of-e.patch | 76 + ...ture-negotiation-for-ACCESS_PLATFORM.patch | 102 + ...ondition-for-iommu_platform-not-supp.patch | 115 + ...m-virtio-gpu-do-not-byteswap-padding.patch | 48 + ...-ctrl_vq-index-for-non-mq-guest-for-.patch | 143 + ...-handle-mq-request-in-userspace-hand.patch | 109 + ...-map-leaking-on-error-during-receive.patch | 60 + ...-vhost_dev-and-notifiers-for-cvq-onl.patch | 52 + ...t-SCSI-devices-from-main-loop-thread.patch | 337 + ...embership-of-all-supplementary-group.patch | 110 + ...eakage-due-to-fuse_init_in-size-chan.patch | 63 + ...date_time_get_microsecond-to-get-sub.patch | 65 + SOURCES/kvm-vmxcap-Add-5-level-EPT-bit.patch | 48 + .../kvm-x86-Add-AMX-CPUIDs-enumeration.patch | 135 + ...MX-XTILECFG-and-XTILEDATA-components.patch | 112 + ...FD-faulting-bit-for-state-components.patch | 62 + ...-x86-Add-q35-RHEL-8.6.0-machine-type.patch | 68 + ...yte-boundary-enumeration-for-extende.patch | 88 + ...m-x86-Grant-AMX-permission-for-guest.patch | 215 + ...ort-XFD-and-AMX-xsave-data-migration.patch | 178 + ...for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch | 182 + SOURCES/kvm-x86.conf | 12 + SOURCES/kvm.conf | 3 + SOURCES/qemu-ga.sysconfig | 19 + SOURCES/qemu-guest-agent.service | 20 + SOURCES/qemu-pr-helper.service | 15 + SOURCES/qemu-pr-helper.socket | 9 + SOURCES/tests_data_acpi_pc_SSDT.dimmpxm | Bin 0 -> 734 bytes SOURCES/tests_data_acpi_q35_FACP.slic | Bin 0 -> 244 bytes SOURCES/tests_data_acpi_q35_SSDT.dimmpxm | Bin 0 -> 734 bytes SOURCES/tests_data_acpi_virt_SSDT.memhp | Bin 0 -> 736 bytes SOURCES/udev-kvm-check.c | 155 + SOURCES/vhost.conf | 3 + SPECS/qemu-kvm.spec | 4868 +++++ 284 files changed, 59108 insertions(+) create mode 100644 .gitignore create mode 100644 .qemu-kvm.metadata create mode 100644 SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch create mode 100644 SOURCES/0005-Initial-redhat-build.patch create mode 100644 SOURCES/0006-Enable-disable-devices-for-RHEL.patch create mode 100644 SOURCES/0007-Machine-type-related-general-changes.patch create mode 100644 SOURCES/0008-Add-aarch64-machine-types.patch create mode 100644 SOURCES/0009-Add-ppc64-machine-types.patch create mode 100644 SOURCES/0010-Add-s390x-machine-types.patch create mode 100644 SOURCES/0011-Add-x86_64-machine-types.patch create mode 100644 SOURCES/0012-Enable-make-check.patch create mode 100644 SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch create mode 100644 SOURCES/0014-Add-support-statement-to-help-output.patch create mode 100644 SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch create mode 100644 SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch create mode 100644 SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch create mode 100644 SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch create mode 100644 SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch create mode 100644 SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch create mode 100644 SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch create mode 100644 SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch create mode 100644 SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch create mode 100644 SOURCES/81-kvm-rhel.rules create mode 100644 SOURCES/85-kvm.preset create mode 100644 SOURCES/95-kvm-memlock.conf create mode 100644 SOURCES/99-qemu-guest-agent.rules create mode 100644 SOURCES/README.tests create mode 100644 SOURCES/bridge.conf create mode 100644 SOURCES/ksm.service create mode 100644 SOURCES/ksm.sysconfig create mode 100644 SOURCES/ksmctl.c create mode 100644 SOURCES/ksmtuned create mode 100644 SOURCES/ksmtuned.conf create mode 100644 SOURCES/ksmtuned.service create mode 100644 SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch create mode 100644 SOURCES/kvm-Enable-SGX-RH-Only.patch create mode 100644 SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch create mode 100644 SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch create mode 100644 SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch create mode 100644 SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch create mode 100644 SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch create mode 100644 SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch create mode 100644 SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch create mode 100644 SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch create mode 100644 SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch create mode 100644 SOURCES/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch create mode 100644 SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch create mode 100644 SOURCES/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch create mode 100644 SOURCES/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch create mode 100644 SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch create mode 100644 SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch create mode 100644 SOURCES/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch create mode 100644 SOURCES/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch create mode 100644 SOURCES/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch create mode 100644 SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch create mode 100644 SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch create mode 100644 SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch create mode 100644 SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch create mode 100644 SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch create mode 100644 SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch create mode 100644 SOURCES/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch create mode 100644 SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch create mode 100644 SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch create mode 100644 SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch create mode 100644 SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch create mode 100644 SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch create mode 100644 SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch create mode 100644 SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch create mode 100644 SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch create mode 100644 SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch create mode 100644 SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch create mode 100644 SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch create mode 100644 SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch create mode 100644 SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch create mode 100644 SOURCES/kvm-doc-Add-the-SGX-numa-description.patch create mode 100644 SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch create mode 100644 SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch create mode 100644 SOURCES/kvm-dump-Add-more-offset-variables.patch create mode 100644 SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch create mode 100644 SOURCES/kvm-dump-Consolidate-elf-note-function.patch create mode 100644 SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch create mode 100644 SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch create mode 100644 SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch create mode 100644 SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch create mode 100644 SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch create mode 100644 SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch create mode 100644 SOURCES/kvm-dump-Remove-the-sh_info-variable.patch create mode 100644 SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch create mode 100644 SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch create mode 100644 SOURCES/kvm-dump-Reorder-struct-DumpState.patch create mode 100644 SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch create mode 100644 SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch create mode 100644 SOURCES/kvm-dump-Rework-filter-area-variables.patch create mode 100644 SOURCES/kvm-dump-Rework-get_start_block.patch create mode 100644 SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch create mode 100644 SOURCES/kvm-dump-Use-ERRP_GUARD.patch create mode 100644 SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch create mode 100644 SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch create mode 100644 SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch create mode 100644 SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch create mode 100644 SOURCES/kvm-edu-add-smp_mb__after_rmw.patch create mode 100644 SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch create mode 100644 SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch create mode 100644 SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch create mode 100644 SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch create mode 100644 SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch create mode 100644 SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch create mode 100644 SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch create mode 100644 SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch create mode 100644 SOURCES/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch create mode 100644 SOURCES/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch create mode 100644 SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch create mode 100644 SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch create mode 100644 SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch create mode 100644 SOURCES/kvm-include-elf.h-add-s390x-note-types.patch create mode 100644 SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch create mode 100644 SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch create mode 100644 SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch create mode 100644 SOURCES/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch create mode 100644 SOURCES/kvm-iotests-281-Test-lingering-timers.patch create mode 100644 SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch create mode 100644 SOURCES/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch create mode 100644 SOURCES/kvm-iotests-block-status-cache-New-test.patch create mode 100644 SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch create mode 100644 SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch create mode 100644 SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch create mode 100644 SOURCES/kvm-kvm-Atomic-memslot-updates.patch create mode 100644 SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch create mode 100644 SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch create mode 100644 SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch create mode 100644 SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch create mode 100644 SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch create mode 100644 SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch create mode 100644 SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch create mode 100644 SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch create mode 100644 SOURCES/kvm-migration-All-this-fields-are-unsigned.patch create mode 100644 SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch create mode 100644 SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch create mode 100644 SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch create mode 100644 SOURCES/kvm-migration-Introduce-ram_transferred_add.patch create mode 100644 SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch create mode 100644 SOURCES/kvm-migration-Read-state-once.patch create mode 100644 SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch create mode 100644 SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch create mode 100644 SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch create mode 100644 SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch create mode 100644 SOURCES/kvm-multifd-Add-missing-documentation.patch create mode 100644 SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch create mode 100644 SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch create mode 100644 SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch create mode 100644 SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch create mode 100644 SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch create mode 100644 SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch create mode 100644 SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch create mode 100644 SOURCES/kvm-multifd-Remove-send_write-method.patch create mode 100644 SOURCES/kvm-multifd-Rename-used-field-to-num.patch create mode 100644 SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch create mode 100644 SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch create mode 100644 SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch create mode 100644 SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch create mode 100644 SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch create mode 100644 SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch create mode 100644 SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch create mode 100644 SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch create mode 100644 SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch create mode 100644 SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch create mode 100644 SOURCES/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch create mode 100644 SOURCES/kvm-physmem-add-missing-memory-barrier.patch create mode 100644 SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch create mode 100644 SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch create mode 100644 SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch create mode 100644 SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch create mode 100644 SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch create mode 100644 SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch create mode 100644 SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch create mode 100644 SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch create mode 100644 SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch create mode 100644 SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch create mode 100644 SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch create mode 100644 SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch create mode 100644 SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch create mode 100644 SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch create mode 100644 SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch create mode 100644 SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch create mode 100644 SOURCES/kvm-s390x-Add-protected-dump-cap.patch create mode 100644 SOURCES/kvm-s390x-Introduce-PV-query-interface.patch create mode 100644 SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch create mode 100644 SOURCES/kvm-s390x-css-fix-PMCW-invalid-mask.patch create mode 100644 SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch create mode 100644 SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch create mode 100644 SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch create mode 100644 SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch create mode 100644 SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch create mode 100644 SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch create mode 100644 SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch create mode 100644 SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch create mode 100644 SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch create mode 100644 SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch create mode 100644 SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch create mode 100644 SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch create mode 100644 SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch create mode 100644 SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch create mode 100644 SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch create mode 100644 SOURCES/kvm-s390x-pv-Add-dump-support.patch create mode 100644 SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch create mode 100644 SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch create mode 100644 SOURCES/kvm-s390x.conf create mode 100644 SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch create mode 100644 SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch create mode 100644 SOURCES/kvm-setup create mode 100644 SOURCES/kvm-setup.service create mode 100644 SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch create mode 100644 SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch create mode 100644 SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch create mode 100644 SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch create mode 100644 SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch create mode 100644 SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch create mode 100644 SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch create mode 100644 SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch create mode 100644 SOURCES/kvm-tests-acpi-SLIC-update-expected-blobs.patch create mode 100644 SOURCES/kvm-tests-acpi-add-SLIC-table-test.patch create mode 100644 SOURCES/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch create mode 100644 SOURCES/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch create mode 100644 SOURCES/kvm-tests-acpi-update-expected-blobs.patch create mode 100644 SOURCES/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch create mode 100644 SOURCES/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch create mode 100644 SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch create mode 100644 SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch create mode 100644 SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch create mode 100644 SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch create mode 100644 SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch create mode 100644 SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch create mode 100644 SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch create mode 100644 SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch create mode 100644 SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch create mode 100644 SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch create mode 100644 SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch create mode 100644 SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch create mode 100644 SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch create mode 100644 SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch create mode 100644 SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch create mode 100644 SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch create mode 100644 SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch create mode 100644 SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch create mode 100644 SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch create mode 100644 SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch create mode 100644 SOURCES/kvm-vmxcap-Add-5-level-EPT-bit.patch create mode 100644 SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch create mode 100644 SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch create mode 100644 SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch create mode 100644 SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch create mode 100644 SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch create mode 100644 SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch create mode 100644 SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch create mode 100644 SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch create mode 100644 SOURCES/kvm-x86.conf create mode 100644 SOURCES/kvm.conf create mode 100644 SOURCES/qemu-ga.sysconfig create mode 100644 SOURCES/qemu-guest-agent.service create mode 100644 SOURCES/qemu-pr-helper.service create mode 100644 SOURCES/qemu-pr-helper.socket create mode 100644 SOURCES/tests_data_acpi_pc_SSDT.dimmpxm create mode 100644 SOURCES/tests_data_acpi_q35_FACP.slic create mode 100644 SOURCES/tests_data_acpi_q35_SSDT.dimmpxm create mode 100644 SOURCES/tests_data_acpi_virt_SSDT.memhp create mode 100644 SOURCES/udev-kvm-check.c create mode 100644 SOURCES/vhost.conf create mode 100644 SPECS/qemu-kvm.spec diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5dae2e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/qemu-6.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata new file mode 100644 index 0000000..6f39e05 --- /dev/null +++ b/.qemu-kvm.metadata @@ -0,0 +1 @@ +68cd61a466170115b88817e2d52db2cd7a92f43a SOURCES/qemu-6.2.0.tar.xz diff --git a/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..43fbac3 --- /dev/null +++ b/SOURCES/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,17931 @@ +From 0a17d5f6abf800e88069738904e3fcd8427ab28a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 5 Aug 2021 01:07:55 -0400 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (weekly-210217): + - Upstream slirp updated to 8f43a99191afb47ca3f3c6972f6306209f367ece + +Rebase notes (6.1.0-rc2): +- Upstream slirp updated to a88d9ace234a24ce1c17189642ef9104799425e0 + +Merged commits (weekly-210203): + - a3f5f082f Drop bogus IPv6 messagesa + +Merged commits (weekly-210714): +- ce9ddeef04 Add mtod_check() +- 0609398e76 bootp: limit vendor-specific area to input packet memory buffer +- 377f755273 bootp: check bootp_input buffer size +- 4101e41f0d upd6: check udp6_input buffer size +- 7a663c9667 tftp: check tftp_input buffer size +- 76f81fc22c tftp: introduce a header structure +- 6903e9ba25 udp: check upd_input buffer size +- 8aa4fe0b6d Fix "DHCP broken in libslirp v4.6.0" +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 11 + + slirp/.gitlab-ci.yml | 43 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 184 ++++ + slirp/COPYRIGHT | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/meson.build | 162 ++++ + slirp/meson_options.txt | 2 + + slirp/src/arp_table.c | 94 ++ + slirp/src/bootp.c | 375 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 59 ++ + slirp/src/dhcpv6.c | 224 +++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 215 +++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 444 +++++++++ + slirp/src/ip6_icmp.h | 220 +++++ + slirp/src/ip6_input.c | 88 ++ + slirp/src/ip6_output.c | 45 + + slirp/src/ip_icmp.c | 524 +++++++++++ + slirp/src/ip_icmp.h | 168 ++++ + slirp/src/ip_input.c | 463 +++++++++ + slirp/src/ip_output.c | 171 ++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 236 +++++ + slirp/src/libslirp.map | 36 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 281 ++++++ + slirp/src/mbuf.h | 192 ++++ + slirp/src/misc.c | 440 +++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 197 ++++ + slirp/src/ndp_table.c | 98 ++ + slirp/src/sbuf.c | 168 ++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1387 +++++++++++++++++++++++++++ + slirp/src/slirp.h | 289 ++++++ + slirp/src/socket.c | 1104 ++++++++++++++++++++++ + slirp/src/socket.h | 186 ++++ + slirp/src/state.c | 379 ++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1552 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 ++++++++++ + slirp/src/tcp_subr.c | 1011 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 470 ++++++++++ + slirp/src/tftp.h | 58 ++ + slirp/src/udp.c | 425 +++++++++ + slirp/src/udp.h | 96 ++ + slirp/src/udp6.c | 196 ++++ + slirp/src/util.c | 441 +++++++++ + slirp/src/util.h | 203 ++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ + 69 files changed, 17389 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100644 slirp/meson.build + create mode 100644 slirp/meson_options.txt + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000000..17fb49fe65 +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000000..bd4845ca29 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,184 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [4.6.1] - 2021-06-18 ++ ++### Fixed ++ ++ - Fix DHCP regression introduced in 4.6.0. !95 ++ ++## [4.6.0] - 2021-06-14 ++ ++### Added ++ ++ - mbuf: Add debugging helpers for allocation. !90 ++ ++### Changed ++ ++ - Revert "Set macOS deployment target to macOS 10.4". !93 ++ ++### Fixed ++ ++ - mtod()-related buffer overflows (CVE-2021-3592 #44, CVE-2021-3593 #45, ++ CVE-2021-3594 #47, CVE-2021-3595 #46). ++ - poll_fd: add missing fd registration for UDP and ICMP ++ - ncsi: make ncsi_calculate_checksum work with unaligned data. !89 ++ - Various typos and doc fixes. !88 ++ ++## [4.5.0] - 2021-05-18 ++ ++### Added ++ ++ - IPv6 forwarding. !62 !75 !77 ++ - slirp_neighbor_info() to dump the ARP/NDP tables. !71 ++ ++### Changed ++ ++ - Lazy guest address resolution for IPv6. !81 ++ - Improve signal handling when spawning a child. !61 ++ - Set macOS deployment target to macOS 10.4. !72 ++ - slirp_add_hostfwd: Ensure all error paths set errno. !80 ++ - More API documentation. ++ ++### Fixed ++ ++ - Assertion failure on unspecified IPv6 address. !86 ++ - Disable polling for PRI on MacOS, fixing some closing streams issues. !73 ++ - Various memory leak fixes on fastq/batchq. !68 ++ - Memory leak on IPv6 fast-send. !67 ++ - Slow socket response on Windows. !64 ++ - Misc build and code cleanups. !60 !63 !76 !79 !84 ++ ++## [4.4.0] - 2020-12-02 ++ ++### Added ++ ++ - udp, udp6, icmp: handle TTL value. !48 ++ - Enable forwarding ICMP errors. !49 ++ - Add DNS resolving for iOS. !54 ++ ++### Changed ++ ++ - Improve meson subproject() support. !53 ++ - Removed Makefile-based build system. !56 ++ ++### Fixed ++ ++ - socket: consume empty packets. !55 ++ - check pkt_len before reading protocol header (CVE-2020-29129). !57 ++ - ip_stripoptions use memmove (fixes undefined behaviour). !47 ++ - various Coverity-related changes/fixes. ++ ++## [4.3.1] - 2020-07-08 ++ ++### Changed ++ ++ - A silent truncation could occur in `slirp_fmt()`, which will now print a ++ critical message. See also #22. ++ ++### Fixed ++ ++ - CVE-2020-10756 - Drop bogus IPv6 messages that could lead to data leakage. ++ See !44 and !42. ++ - Fix win32 builds by using the SLIRP_PACKED definition. ++ - Various coverity scan errors fixed. !41 ++ - Fix new GCC warnings. !43 ++ ++## [4.3.0] - 2020-04-22 ++ ++### Added ++ ++ - `SLIRP_VERSION_STRING` macro, with the git sha suffix when building from git ++ - `SlirpConfig.disable_dns`, to disable DNS redirection #16 ++ ++### Changed ++ ++ - `slirp_version_string()` now has the git sha suffix when building form git ++ - Limit DNS redirection to port 53 #16 ++ ++### Fixed ++ ++ - Fix build regression with mingw & NetBSD ++ - Fix use-afte-free in `ip_reass()` (CVE-2020-1983) ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New `SlirpConfig.outbound_addr{,6}` fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if `get_dns_addr()` failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[Unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.1...master ++[4.6.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.0...v4.6.1 ++[4.6.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.5.0...v4.6.0 ++[4.5.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.4.0...v4.5.0 ++[4.4.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.1...v4.4.0 ++[4.3.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.0...v4.3.1 ++[4.3.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...v4.3.0 ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000000..ed49512dbc +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000000..9f9c1b14f6 +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000000..5617eb8d4e +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000000..cb1396ad59 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,162 @@ ++project('libslirp', 'c', ++ version : '4.6.1', ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.50', ++) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++ ++full_version = run_command('build-aux/git-version-gen', ++ '@0@/.tarball-version'.format(meson.current_source_dir()), ++ check : true).stdout().strip() ++if full_version.startswith('UNKNOWN') ++ full_version = meson.project_version() ++elif not full_version.startswith(meson.project_version()) ++ error('meson.build project version @0@ does not match git-describe output @1@' ++ .format(meson.project_version(), full_version)) ++endif ++conf.set_quoted('SLIRP_VERSION_STRING', full_version + get_option('version_suffix')) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 3 ++lt_revision = 1 ++lt_age = 3 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++elif host_system == 'darwin' ++ platform_deps += [ ++ cc.find_library('resolv') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++if cc.check_header('valgrind/valgrind.h') ++ cargs += [ '-DHAVE_VALGRIND=1' ] ++endif ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = [] ++vflag_test = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++if cc.has_link_argument(vflag_test) ++ vflag += vflag_test ++endif ++ ++install_devel = not meson.is_subproject() ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install : install_devel, ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : install_devel or get_option('default_library') == 'shared', ++) ++ ++if install_devel ++ install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++ pkg = import('pkgconfig') ++ ++ pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++ ) ++else ++ if get_option('default_library') == 'both' ++ lib = lib.get_static_lib() ++ endif ++ libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++endif +diff --git a/slirp/meson_options.txt b/slirp/meson_options.txt +new file mode 100644 +index 0000000000..27e7c8059c +--- /dev/null ++++ b/slirp/meson_options.txt +@@ -0,0 +1,2 @@ ++option('version_suffix', type: 'string', value: '', ++ description: 'Suffix to append to SLIRP_VERSION_STRING') +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000000..ba8c8a4eee +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,94 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000000..d78d61b44c +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,375 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, ++ const uint8_t *bp_end, ++ int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < bp_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= bp_end) ++ break; ++ len = *p++; ++ if (p + len > bp_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, ++ const struct bootp_t *bp, ++ const uint8_t *bp_end) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = rbp->bp_vend + DHCP_OPT_LEN; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q++ = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ assert(q <= end); ++ ++ m->m_len = sizeof(struct bootp_t) + (end - rbp->bp_vend) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); ++ ++ if (bp && bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp, m_end(m)); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000000..31ce5fd33f +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000000..b1cb97b7e1 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000000..0f9f3eff3f +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++#define DBG_VERBOSE_CALL (1 << 4) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_VERBOSE_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_VERBOSE_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000000..77b451b910 +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000000..d12c49b36c +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000000..55497e860e +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)(void *)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000000..9a1eec97b8 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,215 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ M_DUP_DEBUG(slirp, ifm, 0, 0); ++ ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_VERBOSE_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000000..7cf9d2750e +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000000..e5d4aa8a6d +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000000..0630309d29 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000000..738b40f725 +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = *src; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ struct in6_addr src = LINKLOCAL_ADDR; ++ icmp6_forward_error(m, type, code, &src); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ g_assert(M_ROOMBEFORE(m) >= ETH_HLEN); ++ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, ETH_HLEN); ++ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000000..9070999cfc +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,220 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000000..b3d98653df +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, TCPIPHDR_DELTA + 2 + ETH_HLEN); ++ ++ struct ip6 *ip6; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000000..834f1c0a32 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,45 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ /* We cannot fast-send non-multicast, we'd need a NDP NS */ ++ assert(IN6_IS_ADDR_MULTICAST(&ip->ip_dst)); ++ if_encap(m->slirp, m); ++ m_free(m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000000..9fba653a46 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ icmp_detach(so); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ int ttl; ++ ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ /* We could send this as ICMP, good! */ ++ return; ++ } ++ ++ /* We could not send this as ICMP, try to send it on UDP echo ++ * service (7), wishfully hoping that it is open there. */ ++ ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = ip->ip_ttl-1; ++ if (ttl <= 0) { ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, ++ NULL); ++ udp_detach(so); ++ break; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* switch */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); ++ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = *src; ++ ++ ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ icmp_forward_error(msrc, type, code, minsize, message, &msrc->slirp->vhost_addr); ++} ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000000..569a083061 +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000000..a29c324cce +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,463 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, TCPIPHDR_DELTA); ++ ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was allocated. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memmove(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000000..4f62605915 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ M_DUP_DEBUG(slirp, m0, 0, 0); ++ ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000000..faa6c85952 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000000..5760d53cea +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,236 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Opaque structure containing the slirp state */ ++typedef struct Slirp Slirp; ++ ++/* Flags passed to SlirpAddPollCb and to be returned by SlirpGetREventsCb. */ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp, to be set by the application. ++ * ++ * The opaque parameter is set to the opaque pointer given in the slirp_new / ++ * slirp_init call. ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque parameter is the ++ * one given to slirp_init(). If the guest is not ready to receive a frame, ++ * the function can just drop the data. TCP will then handle retransmissions ++ * at a lower pace. ++ * <0 reports an IO error. ++ */ ++ SlirpWriteCb send_packet; ++ /* Print a message for an error due to guest misbehavior. */ ++ void (*guest_error)(const char *msg, void *opaque); ++ /* Return the virtual clock value in nanoseconds */ ++ int64_t (*clock_get_ns)(void *opaque); ++ /* Create a new timer with the given callback and opaque data */ ++ void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); ++ /* Remove and free a timer */ ++ void (*timer_free)(void *timer, void *opaque); ++ /* Modify a timer to expire at @expire_time (ms) */ ++ void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); ++ /* Register a fd for future polling */ ++ void (*register_poll_fd)(int fd, void *opaque); ++ /* Unregister a fd */ ++ void (*unregister_poll_fd)(int fd, void *opaque); ++ /* Kick the io-thread, to signal that new events may be processed */ ++ void (*notify)(void *opaque); ++} SlirpCb; ++ ++#define SLIRP_CONFIG_VERSION_MIN 1 ++#define SLIRP_CONFIG_VERSION_MAX 3 ++ ++typedef struct SlirpConfig { ++ /* Version must be provided */ ++ uint32_t version; ++ /* ++ * Fields introduced in SlirpConfig version 1 begin ++ */ ++ int restricted; ++ bool in_enabled; ++ struct in_addr vnetwork; ++ struct in_addr vnetmask; ++ struct in_addr vhost; ++ bool in6_enabled; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost6; ++ const char *vhostname; ++ const char *tftp_server_name; ++ const char *tftp_path; ++ const char *bootfile; ++ struct in_addr vdhcp_start; ++ struct in_addr vnameserver; ++ struct in6_addr vnameserver6; ++ const char **vdnssearch; ++ const char *vdomainname; ++ /* Default: IF_MTU_DEFAULT */ ++ size_t if_mtu; ++ /* Default: IF_MRU_DEFAULT */ ++ size_t if_mru; ++ /* Prohibit connecting to 127.0.0.1:* */ ++ bool disable_host_loopback; ++ /* ++ * Enable emulation code (*warning*: this code isn't safe, it is not ++ * recommended to enable it) ++ */ ++ bool enable_emu; ++ /* ++ * Fields introduced in SlirpConfig version 2 begin ++ */ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ /* ++ * Fields introduced in SlirpConfig version 3 begin ++ */ ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++} SlirpConfig; ++ ++/* Create a new instance of a slirp stack */ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, ++ void *opaque); ++/* slirp_init is deprecated in favor of slirp_new */ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque); ++/* Shut down an instance of a slirp stack */ ++void slirp_cleanup(Slirp *slirp); ++ ++/* This is called by the application when it is about to sleep through poll(). ++ * *timeout is set to the amount of virtual time (in ms) that the application intends to ++ * wait (UINT32_MAX if infinite). slirp_pollfds_fill updates it according to ++ * e.g. TCP timers, so the application knows it should sleep a smaller amount of ++ * time. slirp_pollfds_fill calls add_poll for each file descriptor ++ * that should be monitored along the sleep. The opaque pointer is passed as ++ * such to add_poll, and add_poll returns an index. */ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque); ++ ++/* This is called by the application after sleeping, to report which file ++ * descriptors are available. slirp_pollfds_poll calls get_revents on each file ++ * descriptor, giving it the index that add_poll returned during the ++ * slirp_pollfds_fill call, to know whether the descriptor is available for ++ * read/write/etc. (SLIRP_POLL_*) ++ * select_error should be passed 1 if poll() returned an error. */ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque); ++ ++/* This is called by the application when the guest emits a packet on the ++ * guest network, to be interpreted by slirp. */ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++/* These set up / remove port forwarding between a host port in the real world ++ * and the guest network. */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port); ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port); ++ ++#define SLIRP_HOSTFWD_UDP 1 ++#define SLIRP_HOSTFWD_V6ONLY 2 ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags); ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags); ++ ++/* Set up port forwarding between a port in the guest network and a ++ * command running on the host */ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * Unix port on the host */ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * callback that will receive the data coming from the port */ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port); ++ ++/* TODO: rather identify a guestfwd through an opaque pointer instead of through ++ * the guest_addr */ ++ ++/* This is called by the application for a guestfwd, to determine how much data ++ * can be received by the forwarded port through a call to slirp_socket_recv. */ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++/* This is called by the application for a guestfwd, to provide the data to be ++ * sent on the forwarded port */ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size); ++ ++/* Remove entries added by slirp_add_exec, slirp_add_unix or slirp_add_guestfwd */ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++/* Return a human-readable state of the slirp stack */ ++char *slirp_connection_info(Slirp *slirp); ++ ++/* Return a human-readable state of the NDP/ARP tables */ ++char *slirp_neighbor_info(Slirp *slirp); ++ ++/* Save the slirp state through the write_cb. The opaque pointer is passed as ++ * such to the write_cb. */ ++void slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); ++ ++/* Returns the version of the slirp state, to be saved along the state */ ++int slirp_state_version(void); ++ ++/* Load the slirp state through the read_cb. The opaque pointer is passed as ++ * such to the read_cb. The version should be given as it was obtained from ++ * slirp_state_version when slirp_state_save was called. */ ++int slirp_state_load(Slirp *s, int version_id, SlirpReadCb read_cb, ++ void *opaque); ++ ++/* Return the version of the slirp implementation */ ++const char *slirp_version_string(void); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_H */ +diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map +new file mode 100644 +index 0000000000..792b0a94ab +--- /dev/null ++++ b/slirp/src/libslirp.map +@@ -0,0 +1,36 @@ ++SLIRP_4.0 { ++global: ++ slirp_add_exec; ++ slirp_add_guestfwd; ++ slirp_add_hostfwd; ++ slirp_cleanup; ++ slirp_connection_info; ++ slirp_init; ++ slirp_input; ++ slirp_pollfds_fill; ++ slirp_pollfds_poll; ++ slirp_remove_hostfwd; ++ slirp_socket_can_recv; ++ slirp_socket_recv; ++ slirp_state_load; ++ slirp_state_save; ++ slirp_state_version; ++ slirp_version_string; ++local: ++ *; ++}; ++ ++SLIRP_4.1 { ++ slirp_new; ++} SLIRP_4.0; ++ ++SLIRP_4.2 { ++ slirp_add_unix; ++ slirp_remove_guestfwd; ++} SLIRP_4.1; ++ ++SLIRP_4.5 { ++ slirp_add_hostxfwd; ++ slirp_remove_hostxfwd; ++ slirp_neighbor_info; ++} SLIRP_4.2; +diff --git a/slirp/src/main.h b/slirp/src/main.h +new file mode 100644 +index 0000000000..3b3f883703 +--- /dev/null ++++ b/slirp/src/main.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_MAIN_H ++#define SLIRP_MAIN_H ++ ++extern unsigned curtime; ++extern struct in_addr loopback_addr; ++extern unsigned long loopback_mask; ++ ++int if_encap(Slirp *slirp, struct mbuf *ifm); ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); ++ ++#endif +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +new file mode 100644 +index 0000000000..36864a401f +--- /dev/null ++++ b/slirp/src/mbuf.c +@@ -0,0 +1,281 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski ++ */ ++ ++/* ++ * mbuf's in SLiRP are much simpler than the real mbufs in ++ * FreeBSD. They are fixed size, determined by the MTU, ++ * so that one whole packet can fit. Mbuf's cannot be ++ * chained together. If there's more data than the mbuf ++ * could hold, an external g_malloced buffer is pointed to ++ * by m_ext (and the data pointers) and M_EXT is set in ++ * the flags ++ */ ++ ++#include "slirp.h" ++ ++#define MBUF_THRESH 30 ++ ++/* ++ * Find a nice value for msize ++ */ ++#define SLIRP_MSIZE(mtu) \ ++ (offsetof(struct mbuf, m_dat) + IF_MAXLINKHDR + TCPIPHDR_DELTA + (mtu)) ++ ++void m_init(Slirp *slirp) ++{ ++ slirp->m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++static void m_cleanup_list(struct quehead *list_head) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)list_head->qh_link; ++ while ((struct quehead *)m != list_head) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ list_head->qh_link = list_head; ++ list_head->qh_rlink = list_head; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ m_cleanup_list(&slirp->m_usedlist); ++ m_cleanup_list(&slirp->m_freelist); ++ m_cleanup_list(&slirp->if_batchq); ++ m_cleanup_list(&slirp->if_fastq); ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (MBUF_DEBUG || slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (MBUF_DEBUG || slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ m->m_flags &= ~M_EXT; ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} ++ ++/* ++ * Duplicate the mbuf ++ * ++ * copy_header specifies whether the bytes before m_data should also be copied. ++ * header_size specifies how many bytes are to be reserved before m_data. ++ */ ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, ++ bool copy_header, ++ size_t header_size) ++{ ++ struct mbuf *n; ++ int mcopy_result; ++ ++ /* The previous mbuf was supposed to have it already, we can check it along ++ * the way */ ++ assert(M_ROOMBEFORE(m) >= header_size); ++ ++ n = m_get(slirp); ++ m_inc(n, m->m_len + header_size); ++ ++ if (copy_header) { ++ m->m_len += header_size; ++ m->m_data -= header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len + header_size); ++ n->m_data += header_size; ++ m->m_len -= header_size; ++ m->m_data += header_size; ++ } else { ++ n->m_data += header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len); ++ } ++ g_assert(mcopy_result == 0); ++ ++ return n; ++} ++ ++void *mtod_check(struct mbuf *m, size_t len) ++{ ++ if (m->m_len >= len) { ++ return m->m_data; ++ } ++ ++ DEBUG_ERROR("mtod failed"); ++ ++ return NULL; ++} ++ ++void *m_end(struct mbuf *m) ++{ ++ return m->m_data + m->m_len; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000000..34e697a914 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++/* ++ * How much free room there is before m_data ++ */ ++#define M_ROOMBEFORE(m) \ ++ (((m)->m_flags & M_EXT) ? (m)->m_data - (m)->m_ext \ ++ : (m)->m_data - (m)->m_dat) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, bool copy_header, size_t header_size); ++struct mbuf *dtom(Slirp *, void *); ++void *mtod_check(struct mbuf *, size_t len); ++void *m_end(struct mbuf *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#ifdef DEBUG ++# define MBUF_DEBUG 1 ++#else ++# ifdef HAVE_VALGRIND ++# include ++# define MBUF_DEBUG RUNNING_ON_VALGRIND ++# else ++# define MBUF_DEBUG 0 ++# endif ++#endif ++ ++/* ++ * When a function is given an mbuf as well as the responsibility to free it, we ++ * want valgrind etc. to properly identify the new responsible for the ++ * free. Achieve this by making a new copy. For instance: ++ * ++ * f0(void) { ++ * struct mbuf *m = m_get(slirp); ++ * [...] ++ * switch (something) { ++ * case 1: ++ * f1(m); ++ * break; ++ * case 2: ++ * f2(m); ++ * break; ++ * [...] ++ * } ++ * } ++ * ++ * f1(struct mbuf *m) { ++ * M_DUP_DEBUG(m->slirp, m); ++ * [...] ++ * m_free(m); // but author of f1 might be forgetting this ++ * } ++ * ++ * f0 transfers the freeing responsibility to f1, f2, etc. Without the ++ * M_DUP_DEBUG call in f1, valgrind would tell us that it is f0 where the buffer ++ * was allocated, but it's difficult to know whether a leak is actually in f0, ++ * or in f1, or in f2, etc. Duplicating the mbuf in M_DUP_DEBUG each time the ++ * responsibility is transferred allows to immediately know where the leak ++ * actually is. ++ */ ++#define M_DUP_DEBUG(slirp, m, copy_header, header_size) do { \ ++ if (MBUF_DEBUG) { \ ++ struct mbuf *__n; \ ++ __n = m_dup((slirp), (m), (copy_header), (header_size)); \ ++ m_free(m); \ ++ (m) = __n; \ ++ } else { \ ++ (void) (slirp); (void) (copy_header); \ ++ g_assert(M_ROOMBEFORE(m) >= (header_size)); \ ++ } \ ++} while(0) ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000000..48f180be43 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,440 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++ ++ /* Unblock all signals and leave our exec()-ee to block what it wants */ ++ sigset_t ss; ++ sigemptyset(&ss); ++ sigprocmask(SIG_SETMASK, &ss, NULL); ++ ++ /* POSIX is obnoxious about SIGCHLD specifically across exec() */ ++ signal(SIGCHLD, SIG_DFL); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++char *slirp_neighbor_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ ArpTable *arp_table = &slirp->arp_table; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ char ip_addr[INET6_ADDRSTRLEN]; ++ char eth_addr[ETH_ADDRSTRLEN]; ++ const char *ip; ++ ++ g_string_append_printf(str, " %5s %-17s %s\n", ++ "Table", "MacAddr", "IP Address"); ++ ++ for (int i = 0; i < ARP_TABLE_SIZE; ++i) { ++ struct in_addr addr; ++ addr.s_addr = arp_table->table[i].ar_sip; ++ if (!addr.s_addr) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET, &addr, ip_addr, sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "ARP", ++ slirp_ether_ntoa(arp_table->table[i].ar_sha, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ for (int i = 0; i < NDP_TABLE_SIZE; ++i) { ++ if (in6_zero(&ndp_table->table[i].ip_addr)) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET6, &ndp_table->table[i].ip_addr, ip_addr, ++ sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "NDP", ++ slirp_ether_ntoa(ndp_table->table[i].eth_addr, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000000..81b370cfb1 +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000000..39cf8446d6 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++} SLIRP_PACKED; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++} SLIRP_PACKED; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++} SLIRP_PACKED; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++} SLIRP_PACKED; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000000..f3427bd66d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,197 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint8_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len; i += 2) { ++ checksum += (((uint16_t) data[i]) << 8) + data[i+1]; ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct ncsi_pkt_hdr *nh = ++ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint8_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000000..fdb189d595 +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ /* Save the first entry, it is the guest. */ ++ if (in6_zero(&ndp_table->guest_in6_addr)) { ++ ndp_table->guest_in6_addr = ip_addr; ++ } ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ /* If unspecified address */ ++ if (in6_zero(&ip_addr)) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000000..b357091705 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000000..01886fbd01 +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000000..9d3fee3e97 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1387 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#if defined(_WIN32) ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#elif defined(__APPLE__) ++ ++#include ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, unsigned *cached_time) ++{ ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_libresolv(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ struct __res_state state; ++ union res_sockaddr_union servers[NI_MAXSERV]; ++ int count; ++ int found; ++ ++ if (res_ninit(&state) != 0) { ++ return -1; ++ } ++ ++ count = res_getservers(&state, servers, NI_MAXSERV); ++ found = 0; ++ DEBUG_MISC("IP address of your DNS(s):"); ++ for (int i = 0; i < count; i++) { ++ if (af == servers[i].sin.sin_family) { ++ found++; ++ } ++ ++ // we use the first found entry ++ if (found == 1) { ++ memcpy(pdns_addr, &servers[i].sin.sin_addr, addrlen); ++ memcpy(cached_addr, &servers[i].sin.sin_addr, addrlen); ++ if (scope_id) { ++ *scope_id = 0; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(servers[i].sin.sin_family, ++ &servers[i].sin.sin_addr, ++ s, ++ sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ ++ res_nclose(&state); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, &dns6_addr_time); ++} ++ ++#else // !defined(_WIN32) && !defined(__APPLE__) ++ ++#if defined(__HAIKU__) ++#define RESOLV_CONF_PATH "/boot/system/settings/network/resolv.conf" ++#else ++#define RESOLV_CONF_PATH "/etc/resolv.conf" ++#endif ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat(RESOLV_CONF_PATH, cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ union { ++ struct in_addr dns_addr; ++ struct in6_addr dns6_addr; ++ } tmp_addr; ++ unsigned if_index; ++ ++ assert(sizeof(tmp_addr) >= addrlen); ++ f = fopen(RESOLV_CONF_PATH, "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, &tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, &tmp_addr, addrlen); ++ memcpy(cached_addr, &tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, &tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ { "verbose_call", DBG_VERBOSE_CALL }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ ++ if (cfg->version >= 3) { ++ slirp->disable_dns = cfg->disable_dns; ++ } else { ++ slirp->disable_dns = false; ++ } ++ ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++#ifndef __APPLE__ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds. ++ * ++ * This is however disabled on MacOS, which apparently always ++ * reports data as PRI when it is the last data of the ++ * connection. We would then report it out of band, which the guest ++ * would most probably not be ready for. ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else ++#endif ++ if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | SLIRP_POLL_PRI)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct slirp_arphdr *ah = ++ (const struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %s", slirp_ether_ntoa(eh->h_source, ethaddr_str, ++ sizeof(ethaddr_str))); ++ DEBUG_ARG("dst = %s", slirp_ether_ntoa(eh->h_dest, ethaddr_str, ++ sizeof(ethaddr_str))); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr_len == sizeof(addr) && ++ addr.sin_family == AF_INET && ++ addr.sin_addr.s_addr == host_addr.s_addr && ++ addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags) ++{ ++ struct socket *so; ++ struct socket *head = (flags & SLIRP_HOSTFWD_UDP ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_storage addr; ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ sockaddr_equal(&addr, (const struct sockaddr_storage *) haddr)) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags) ++{ ++ struct sockaddr_in gdhcp_addr; ++ int fwd_flags = SS_HOSTFWD; ++ ++ if (flags & SLIRP_HOSTFWD_V6ONLY) ++ fwd_flags |= SS_HOSTFWD_V6ONLY; ++ ++ if (gaddr->sa_family == AF_INET) { ++ const struct sockaddr_in *gaddr_in = (const struct sockaddr_in *) gaddr; ++ ++ if (gaddrlen < sizeof(struct sockaddr_in)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ if (!gaddr_in->sin_addr.s_addr) { ++ gdhcp_addr = *gaddr_in; ++ gdhcp_addr.sin_addr = slirp->vdhcp_startaddr; ++ gaddr = (struct sockaddr *) &gdhcp_addr; ++ gaddrlen = sizeof(gdhcp_addr); ++ } ++ } else { ++ if (gaddrlen < sizeof(struct sockaddr_in6)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ /* ++ * Libslirp currently only provides a stateless DHCPv6 server, thus ++ * we can't translate "addr-any" to the guest here. Instead, we defer ++ * performing the translation to when it's needed. See ++ * soassign_guest_addr_if_needed(). ++ */ ++ } ++ ++ if (flags & SLIRP_HOSTFWD_UDP) { ++ if (!udpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } else { ++ if (!tcpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000000..89d79f3de5 +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,289 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#define O_BINARY 0 ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ uint8_t ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ uint8_t ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ uint8_t eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ /* ++ * The table is a cache with old entries overwritten when the table fills. ++ * Preserve the first entry: it is the guest, which is needed for lazy ++ * hostfwd guest address assignment. ++ */ ++ struct in6_addr guest_in6_addr; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000000..2c1b789d48 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,1104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++#ifdef __linux__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > sizeof(buff)) ++ so->so_urgc = sizeof(buff); /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; /* Amount of room left in buff */ ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ /* We still have some room for the rest */ ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ char buff[256]; ++ ++#ifdef __linux__ ++ ssize_t size; ++ struct msghdr msg; ++ struct iovec iov; ++ char control[1024]; ++ ++ /* First look for errors */ ++ memset(&msg, 0, sizeof(msg)); ++ msg.msg_name = &saddr; ++ msg.msg_namelen = sizeof(saddr); ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ iov.iov_base = buff; ++ iov.iov_len = sizeof(buff); ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ ++ size = recvmsg(so->s, &msg, MSG_ERRQUEUE); ++ if (size >= 0) { ++ struct cmsghdr *cmsg; ++ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { ++ ++ if (cmsg->cmsg_level == IPPROTO_IP && ++ cmsg->cmsg_type == IP_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP) { ++ /* Got an ICMP error, forward it */ ++ struct sockaddr_in *sin; ++ ++ sin = (struct sockaddr_in *) SO_EE_OFFENDER(ee); ++ icmp_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ 0, NULL, &sin->sin_addr); ++ } ++ } ++ else if (cmsg->cmsg_level == IPPROTO_IPV6 && ++ cmsg->cmsg_type == IPV6_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP6) { ++ /* Got an ICMPv6 error, forward it */ ++ struct sockaddr_in6 *sin6; ++ ++ sin6 = (struct sockaddr_in6 *) SO_EE_OFFENDER(ee); ++ icmp6_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ &sin6->sin6_addr); ++ } ++ } ++ } ++ return; ++ } ++#endif ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (so->so_state & SS_HOSTFWD) { ++ if (soassign_guest_addr_if_needed(so) < 0) { ++ DEBUG_MISC(" guest address not available yet"); ++ switch (so->so_lfamily) { ++ case AF_INET: ++ icmp_send_error(so->so_m, ICMP_UNREACH, ++ ICMP_UNREACH_HOST, 0, ++ "guest address not available yet"); ++ break; ++ case AF_INET6: ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, ++ ICMP6_UNREACH_ADDRESS); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ return; ++ } ++ } ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ * On failure errno contains the reason. ++ */ ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen; ++ ++ DEBUG_CALL("tcpx_listen"); ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ int ret; ++ ret = getnameinfo(haddr, haddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("haddr = %s", addrstr); ++ DEBUG_ARG("hport = %s", portstr); ++ ret = getnameinfo(laddr, laddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("laddr = %s", addrstr); ++ DEBUG_ARG("lport = %s", portstr); ++ DEBUG_ARG("flags = %x", flags); ++ ++ /* ++ * SS_HOSTFWD sockets can be accepted multiple times, so they can't be ++ * SS_FACCEPTONCE. Also, SS_HOSTFWD connections can be accepted and ++ * immediately closed if the guest address isn't available yet, which is ++ * incompatible with the "accept once" concept. Correct code will never ++ * request both, so disallow their combination by assertion. ++ */ ++ g_assert(!((flags & SS_HOSTFWD) && (flags & SS_FACCEPTONCE))); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ s = slirp_socket(haddr->sa_family, SOCK_STREAM, 0); ++ if ((s < 0) || ++ (haddr->sa_family == AF_INET6 && slirp_socket_set_v6only(s, (flags & SS_HOSTFWD_V6ONLY) != 0) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, haddr, haddrlen) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ so->s = s; ++ return so; ++} ++ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return tcpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (!s->disable_dns && so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return so->so_fport == htons(53) && get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (!s->disable_dns && in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (so->so_fport == htons(53) && get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} ++ ++/* ++ * Translate "addr-any" in so->lhost to the guest's actual address. ++ * Returns 0 for success, or -1 if the guest doesn't have an address yet ++ * with errno set to EHOSTUNREACH. ++ * ++ * The guest address is taken from the first entry in the ARP table for IPv4 ++ * and the first entry in the NDP table for IPv6. ++ * Note: The IPv4 path isn't exercised yet as all hostfwd "" guest translations ++ * are handled immediately by using slirp->vdhcp_startaddr. ++ */ ++int soassign_guest_addr_if_needed(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ g_assert(so->so_state & SS_HOSTFWD); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_laddr.s_addr == INADDR_ANY) { ++ g_assert_not_reached(); ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_zero(&so->so_laddr6)) { ++ int ret; ++ if (in6_zero(&slirp->ndp_table.guest_in6_addr)) { ++ errno = EHOSTUNREACH; ++ return -1; ++ } ++ so->so_laddr6 = slirp->ndp_table.guest_in6_addr; ++ ret = getnameinfo((const struct sockaddr *) &so->lhost.ss, ++ sizeof(so->lhost.ss), addrstr, sizeof(addrstr), ++ portstr, sizeof(portstr), ++ NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_MISC("%s: new ip = [%s]:%s", __func__, addrstr, portstr); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000000..a73175dc29 +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,186 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++#include "sbuf.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* Helps unify some in/in6 routines. */ ++union in4or6_addr { ++ struct in_addr addr4; ++ struct in6_addr addr6; ++}; ++typedef union in4or6_addr in4or6_addr; ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr sa; ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++#define SS_HOSTFWD_V6ONLY 0x4000 /* Only bind on v6 addresses */ ++ ++static inline int sockaddr_equal(const struct sockaddr_storage *a, ++ const struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; ++ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; ++ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(const struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++static inline void sockaddr_copy(struct sockaddr *dst, socklen_t dstlen, const struct sockaddr *src, socklen_t srclen) ++{ ++ socklen_t len = sockaddr_size((const struct sockaddr_storage *) src); ++ g_assert(len <= srclen); ++ g_assert(len <= dstlen); ++ memcpy(dst, src, len); ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++int soassign_guest_addr_if_needed(struct socket *so); ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000000..22af77b256 +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000000..6cf326f669 +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000000..08bb5b6610 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000000..70a9760664 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000000..36a4844a7d +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1552 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ if (m) ++ M_DUP_DEBUG(m->slirp, m, 0, 0); ++ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ switch (af) { ++ case AF_INET: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr)); ++ break; ++ case AF_INET6: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr)); ++ break; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000000..383fe31dcf +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000000..600cfa1456 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,1011 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt, ret; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ret = getnameinfo((const struct sockaddr *) &inso->lhost.ss, sizeof(inso->lhost.ss), addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("ip = [%s]:%s", addrstr, portstr); ++ DEBUG_ARG("so_state = 0x%x", inso->so_state); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (inso->so_state & SS_HOSTFWD) { ++ /* ++ * We can only reject the connection request by accepting it and ++ * then immediately closing it. Note that SS_FACCEPTONCE sockets can't ++ * get here. ++ */ ++ if (soassign_guest_addr_if_needed(inso) < 0) { ++ /* ++ * Guest address isn't available yet. We could either try to defer ++ * completing this connection request until the guest address is ++ * available, or punt. It's easier to punt. Otherwise we need to ++ * complicate the mechanism by which we're called to defer calling ++ * us again until the guest address is available. ++ */ ++ DEBUG_MISC(" guest address not available yet"); ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s >= 0) { ++ close(s); ++ } ++ return; ++ } ++ } ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000000..bc4db2d15e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000000..584a5594e4 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000000..c8da8cbd16 +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000000..a0fb2282f2 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, ((int) sizeof(struct tcpiphdr) - (int) sizeof(struct ip) - \ ++ (int) sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000000..a19c889d34 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,470 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = hdr->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == hdr->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { ++ return -1; ++ } ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftphdr *hdr) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = hdr->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = hdr->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, hdr.tp_op) + n; ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftphdr *hdr) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, hdr); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, &tp->hdr); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, &tp->hdr); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); ++ ++ if (hdr == NULL) { ++ return; ++ } ++ ++ switch (ntohs(hdr->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, ++ mtod(m, struct tftp_t *), ++ m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, hdr); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, hdr); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000000..cafab03f2f +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,58 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#include "util.h" ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftphdr { ++ struct udphdr udp; ++ uint16_t tp_op; ++} SLIRP_PACKED; ++ ++struct tftp_t { ++ struct tftphdr hdr; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} SLIRP_PACKED; ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000000..06b7b7d032 +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,425 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ int ttl; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod_check(m, iphlen + sizeof(struct udphdr)); ++ if (ip == NULL) { ++ goto bad; ++ } ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = save_ip.ip_ttl-1; ++ if (ttl <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct udpiphdr)); ++ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++#ifdef __linux__ ++ { ++ int opt = 1; ++ switch (af) { ++ case AF_INET: ++ setsockopt(so->s, IPPROTO_IP, IP_RECVERR, &opt, sizeof(opt)); ++ break; ++ case AF_INET6: ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_RECVERR, &opt, sizeof(opt)); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++#endif ++ ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ socklen_t addrlen; ++ int save_errno; ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(haddr->sa_family, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ save_errno = errno; ++ sofree(so); ++ errno = save_errno; ++ return NULL; ++ } ++ if (haddr->sa_family == AF_INET6) ++ slirp_socket_set_v6only(so->s, (flags & SS_HOSTFWD_V6ONLY) != 0); ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ if (bind(so->s, haddr, haddrlen) < 0) { ++ save_errno = errno; ++ udp_detach(so); ++ errno = save_errno; ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(so->s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return udpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000000..47f4ed34d8 +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#include "socket.h" ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *udpx_listen(Slirp *, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000000..efeac5c19a +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ int hop_limit; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod_check(m, sizeof(struct udphdr)); ++ if (uh == NULL) { ++ goto bad; ++ } ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ hop_limit = save_ip.ip_hl-1; ++ if (hop_limit <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &hop_limit, sizeof(hop_limit)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct ip6) + sizeof(struct udphdr)); ++ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000000..e6bccbe0fa +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,441 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ * On failure errno contains the reason. ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++G_GNUC_PRINTF(3, 0) ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} ++ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_size) ++{ ++ assert(out_str_size >= ETH_ADDRSTRLEN); ++ ++ slirp_fmt0(out_str, out_str_size, "%02x:%02x:%02x:%02x:%02x:%02x", ++ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); ++ ++ return out_str; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000000..07654ecf37 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,203 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_ADDRSTRLEN 18 /* "xx:xx:xx:xx:xx:xx", with trailing NUL */ ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_v6only(int fd, int v) ++{ ++ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++/* ++ * Pretty print a MAC address into out_str. ++ * As a convenience returns out_str. ++ */ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_len); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000000..93e0be9c24 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000000..68cc1729c5 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000000..94c6a4bc7b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +2.27.0 + diff --git a/SOURCES/0005-Initial-redhat-build.patch b/SOURCES/0005-Initial-redhat-build.patch new file mode 100644 index 0000000..ddae98d --- /dev/null +++ b/SOURCES/0005-Initial-redhat-build.patch @@ -0,0 +1,351 @@ +From 19ce5ff93ddd6b8a998348f2a5f59f603c5e11b7 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +This rebase includes changes up to qemu-kvm-6.1.0-5.el9 + +Rebase notes (3.1.0): +- added new configure options + +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + +Rebase notes (4.1.0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks +- Added BuildRequires for wget, rpm-build and python3-sphinx +- Removed new unpacked files +- Update configure line to use new options + +Rebase notes (4.2.0): +- Disable iotest run during make check +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" +- Removed qemu-ga.8 install from spec file - installed by make +- Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) + +Rebase notes (5.1.0): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file +- Added virtiofsd.1 (upstream) +- Use out-of-tree build +- New documentation structure (upstream) +- Update local build +- Removing installed qemu-storage-daemon (added upstream) +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) +- Disable iotests (moved from Enable make check commit) +- Added missing configure options +- Reorder configure options +- qemu-pr-helper moved to /usr/libexec/ (upstream) +- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) +- Added setting rc version in Makefile for build +- removed --disable-vxhs configure option (removed upstream) +- bumped required libusbx-devel version to 1.0.23 +- bumped libfdt version to 1.6.0 + +Rebase notes (5.2.0 rc0): +- Move libfdt dependency to qemu-kvm-core +- Move manpage rename from Makefile to spec file +- rename with-confsuffix configure option to with-suffix (upstream) +- Bump libusbx Requires version to 1.0.234 +- Manual copy of keymaps in spec file (BZ 1875217) +- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it + unpackaged for now. +- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. +- Added build dependency for meson and ninja-build +- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables +- Removed Chanelog (upstream) +- Fix in directory used for docs (upstream add %name so we do not pass it in configure) +- Package various .so as part of qemu-kvm-core package. + +Rebase notes (5.2.0 rc2): +- Added fix for dtrace build on RHEL 8.4.0 + +Rebase notes (5.2.0 rc3): +- Added man page for qemu-pr-helper +- Added new configure options +- Update qemu-kiwi patches to v4 + +Rebase notes (6.0.0): +- update tracetool usage in spec file +- remove qemu-storage-daemon-qmp-ref man page +- remove qemu-storage-daemon man page +- Added devel documentation +- do not package virtfs-proxy-helper files +- Use --with-git-submodules instead of --(enable|disable)-git-update +- Minor build fixes for sending upstream +- g_autofree initialization fixed upstream +- Updated rc information usage +- do not package package hw-s390x-virtio-gpu-ccw.so +- Disable new switch options + +Rebase notes (6.1.0): +- Fix warning issue in block.c +- Download tarball from dist-git cache +- Removed sheepdog driver +- Added new display modules: + - hw-display-virtio-gpu-gl.so + - hw-display-virtio-gpu-pci-gl.so + - hw-display-virtio-vga-gl.so +- sasl fix moved from ui/vnc.c to ui/vnc-auth-sasl.c +- Added accel-qtest-%{kvm_target} and accel-tcg-%{kvm_target} +- Added about docs +- Use -q option for setup +- Added hw-usb-host.so +- Disable new options (bpf, nvmm, slirp-smbd) + +Rebase notes (6.2.0): +- Using internal meson +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Removed conflict relics +- Updated configure options + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Merged patches (4.1.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- 69e1fb2 enable virgla +- d4f6115 enable virgl, for real this time ... + +Merged patches (5.1.0): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend +- 63f12d4 redhat: Always use module build target for rh-brew (modified) +- 9b1e140 redhat: updating the modular target +- 44b8bd0 spec: Fix python shenigans for tests + +Merged patches (5.2.0 rc0): +- 9238ce7 Add support for simpletrace +- 5797cff Remove explicit glusterfs-api dependency +- fd62478 disable virgl +- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +- 3645097 redhat: Make all generated so files executable (not only block-*) + +Merged patches (5.2.0 rc2): +- pjw 99657 redhat: introduces disable_everything macro into the configure call +- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines +- pjw 99658 redhat: Fixing rh-local build +- pjw 99660 redhat: Add qemu-kiwi subpackage +- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga + +Merged patches (5.2.0 rc3): +- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail +- pjw 99885 - redhat: adding rh-rpm target + +Merged patches (6.0.0): +- 5ab9954a3b spec: find system python via meson +- cd0f7db11f build-system: use b_staticpic=false +- 80d2dec42c udev-kvm-check: remove the "exceeded subscription limit" message +- 38959d51c0 redhat: Allow make to inherit params from parent make for rh-local +- 1e0cfe458f redhat: moving all documentation files to qemu-kvm-docs +- d7a594d02b redhat: makes qemu respect system's crypto profile +- e2bbf1572b spec: Package qemu-storage-daemon +- 92f10993ba spec: ui-spice sub-package +- 8931e46069 spec: ui-opengl sub-package + +Merged patches (6.1.0): +- 7bb57541b3 redhat: Install the s390-netboot.img that we've built +- b4a8531f41 redhat: Fix "unversioned Obsoletes" warning +- 141a1693c7 redhat: Move qemu-kvm-docs dependency to qemu-kvm +- d75f59c6f9 redhat: introducting qemu-kvm-hw-usbredir +- a934d8bf44 redhat: use the standard vhost-user JSON path + +Merged patches (6.2.0): +- 4f3f04bbb6 spec: Remove qemu-kiwi build +--- + .gitignore | 1 + + .gitlab-ci.yml | 24 - + .gitlab/issue_templates/bug.md | 64 - + .gitlab/issue_templates/feature_request.md | 32 - + README.systemtap | 43 + + meson.build | 4 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 48 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3896 ++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- + redhat/scripts/process-patches.sh | 20 +- + redhat/scripts/tarball_checksum.sh | 2 +- + redhat/udev-kvm-check.c | 19 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 19 files changed, 4142 insertions(+), 156 deletions(-) + delete mode 100644 .gitlab-ci.yml + delete mode 100644 .gitlab/issue_templates/bug.md + delete mode 100644 .gitlab/issue_templates/feature_request.md + create mode 100644 README.systemtap + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 96de1a6ef9..5f6ba86dbb 100644 +--- a/meson.build ++++ b/meson.build +@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f86cb863de..6d38340d49 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -69,6 +69,8 @@ else + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ + + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.27.0 + diff --git a/SOURCES/0006-Enable-disable-devices-for-RHEL.patch b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..a3fa5d1 --- /dev/null +++ b/SOURCES/0006-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,795 @@ +From 3d5a82d172345d17e300672909835262ff9dc917 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:11:07 +0200 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + +Rebase notes (4.1.0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 +- Cleanup aarch64 devices +- Do not build a15mpcore.c +- Removed ide-isa.c stub file +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + +Rebase notes (5.1.0): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) +- Added CONFIG_NVDIMM +- updated cortex-15 disabling to upstream code +- Add CONFIG_ACPI_APEI for aarch64 +- removed obsolete hw/bt/Makefile.objs chunk +- removed unnecessary changes in target/i386/cpu.c + +Rebase notes (5.2.0 rc0): +- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 +- remove vl.c hack for no hpet +- Enable CONFIG_PTIMER for aarch64 +- Do not package hw-display-virtio-gpu.so on s390x + +Rebase notes (5.2.0 rc1): +- Added CONFIG_ARM_GIC for aarch64 (required for build) + +Rebase notes (weekly-210113): +- Removed XICS_KVM, XICS_SPAPR, XIVE_KVM and XIVE_SPAPR config (removed upstream) + +Rebase notes (weekly-210120): +- Add CONFIG_ARM_COMPATIBLE_SEMIHOSTING option + +Rebase notes (weekly-210203): +- Rename CONFIG_PVPANIC to CONFIG_PVPANIC_ISA + +Rebase notes (weekly-210317): +- Add new USB_STORAGE_CORE and USB_STORAGE_CLASSIC config for ppc64 and x86_64 +- Update disabling TCG cpus for AArch64 + +Rebase notes (weekly-210519): +- Do not use CONFIG_SPICE and CONFIG_OPENGL in default configs + +Rebase notes (weekly-210623): +- Add CONFIG_TPM for archs with used TPM functionality + +Rebase notes (weekly-210714): +- default_configs moved to configs + +Rebase notes (6.1.0 rc2): +- Use --with-device-ARCH configure option to use redhat config files + +Rebase notes (6.2.0 rc3): +- Do not remove -no-hpet documentation +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (4.1.0): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- f7587dd RHEL: disable hostmem-memfd + +Merged patches (5.1.0): +- 4543a3c i386: Remove cpu64-rhel6 CPU model +- 96533 aarch64: Remove tcg cpu types (pjw commit) +- 559d589 Revert "RHEL: disable hostmem-memfd" +- 441128e enable ramfb + +Merged patches (5.2.0 rc0): +- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs +- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS +- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs +- 4a8ccfd Disable TPM passthrough backend on ARM + +Merged patches (6.0.0): +- ff817df9e3 config: enable VFIO_CCW +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - without machine type chunks +- efac91b2b4 default-configs: Enable vhost-user-blk + +Merged patches (weekly-210630): +- 59a178acff disable CONFIG_USB_STORAGE_BOT + +Merged patches (6.1.0 rc2): +- 86f0025f16 aarch64: Add USB storage devices +--- + .../aarch64-softmmu/aarch64-rh-devices.mak | 31 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 36 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 16 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 104 ++++++++++++++++++ + .../x86_64-upstream-devices.mak | 4 + + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/char/parallel.c | 9 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/timer/hpet.c | 8 ++ + hw/usb/meson.build | 2 +- + redhat/qemu-kvm.spec.template | 9 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 10 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 23 files changed, 286 insertions(+), 9 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..0d4f9e6e4b +--- /dev/null ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,31 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..73e3ee0293 +--- /dev/null ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,36 @@ ++include ../rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XIVE=y ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/configs/devices/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..165c082e87 +--- /dev/null ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,16 @@ ++include ../rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..ddf036f042 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,104 @@ ++include ../rh-virtio.mak ++include x86_64-upstream-devices.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC_ISA=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS_ISA=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index ebe08ed831..381ef2ddcf 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 21d18ac2e3..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -48,6 +48,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + return; +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index b45e67bfbb..e5f108211b 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index fdca6ca659..fa1a7eee51 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index ce89fd0aa3..fbcf802b13 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index baba62f357..bc360347ea 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..282d01e374 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 58e7341cb7..8ba34f6a1d 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} + endif + +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 4baa111713..d779c4d1d5 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -740,8 +741,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -760,6 +763,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -784,6 +788,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 5b1fdb55c4..c52434985b 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +-- +2.27.0 + diff --git a/SOURCES/0007-Machine-type-related-general-changes.patch b/SOURCES/0007-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..f7bd665 --- /dev/null +++ b/SOURCES/0007-Machine-type-related-general-changes.patch @@ -0,0 +1,1071 @@ +From adca046d9db670637b9bf2b24f7a4349a9fe2628 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Rebase changes (4.1.0): +- Removed optional flag for machine compat properties (upstream) +- Remove c3e002cb chunk from hw/net/e1000.c +- Reorder compat structures +- Use one format for compat scructures +- Added compat for virtio-balloon-pci.any_layout for rhel71 + +Rebase changes (weekly-210303): +- Added rhel 8.4.0 compat based on 5.2 compat + +Rebase changes (weekly-211103): +- Do not duplicate minimal_version_id for piix4_pm + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Merged patches (4.1.0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 + +Merged patches (4.2.0): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) + +Merged patches (5.1.0): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw + +Merged patches (5.2.0 rc0): +- 8348642 redhat: define hw_compat_8_2 +- 45b8402 redhat: define hw_compat_8_2 +- 4effa71 redhat: Update hw_compat_8_2 +- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) + +Merged patches (6.0.0): +- fa0063ba67 redhat: Define hw_compat_8_3 +- d98e328c8d usb/hcd-xhci-pci: Fixup capabilities ordering (again) +- b8a2578117 virtio: move 'use-disabled-flag' property to hw_compat_4_2 +- f7940b04c8 virtio-pci: compat page aligned ATS + +Merged patches (weekly-210602): +- 26f25108c1 redhat: add missing entries in hw_compat_rhel_8_4 + +Merged patches (weekly-211006): +- 43c4b9bea6 redhat: Define hw_compat_rhel_8_5 +--- + hw/acpi/ich9.c | 15 ++ + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 +++ + hw/core/machine.c | 272 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 22 +++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 + + hw/smbios/smbios.c | 46 +++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.h | 1 + + hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 36 +++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 3 + + migration/migration.c | 2 + + migration/migration.h | 5 + + 25 files changed, 514 insertions(+), 26 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 381ef2ddcf..82bd805b55 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -433,6 +433,18 @@ static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) + s->pm.keep_pci_slot_hpc = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -457,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index f0b5fac44a..8d6011c0a3 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 30da05dfe0..5de4d9d73b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, +diff --git a/hw/char/serial.c b/hw/char/serial.c +index 7061aacbce..fe8d0afbb0 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -37,6 +37,7 @@ + #include "trace.h" + #include "hw/qdev-properties.h" + #include "hw/qdev-properties-system.h" ++#include "migration/migration.h" + + #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ + +@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 53a99abc56..be4f9864cd 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,278 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-device", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ + GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 90851e730b..a91c5d7467 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 223dd3e05d..dda3f64f19 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e1e100316d..235054a643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index ac96f7665a..d35bc1f0b0 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -81,6 +81,12 @@ struct E1000EState { + + E1000ECore core; + bool init_vet; ++ ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + }; + + #define E1000E_MMIO_IDX 0 +@@ -96,6 +102,10 @@ struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -307,6 +317,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -478,6 +490,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -605,6 +619,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -616,6 +635,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -664,6 +684,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 90b4fc63ce..3ffb9dd22c 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 4fbafddb22..2f120c6e70 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -43,6 +43,7 @@ + #include "qapi/qapi-events-misc-target.h" + #include "qapi/visitor.h" + #include "hw/rtc/mc146818rtc_regs.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "qapi/qapi-commands-misc-target.h" +@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7397e56737..3a4bb894ba 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index d1b5657d72..7930b868fa 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + s->irq = pci_allocate_irq(dev); + + if (s->masterbus) { +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index e01700039b..d5ea13356c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3494,9 +3494,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3505,6 +3523,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index 98f598382a..50a7b6f6c4 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -149,6 +149,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 7ca92843c6..21abfd8447 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -68,6 +68,9 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 9c1c190104..8bba96ef2b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -441,4 +441,40 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; ++ ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; ++ ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; ++ ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; ++ ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; ++ + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 5a0dd0c8cf..2cb1ec2bab 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9ab39e428f..7ccc9a1a07 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -107,6 +107,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 33668dd0a9..e6b2fe72da 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -582,4 +582,7 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); + ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index abaf6f9e3d..a87ff01b81 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -164,6 +164,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 8130b703eb..d016cedd9d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.27.0 + diff --git a/SOURCES/0008-Add-aarch64-machine-types.patch b/SOURCES/0008-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..2e8c417 --- /dev/null +++ b/SOURCES/0008-Add-aarch64-machine-types.patch @@ -0,0 +1,405 @@ +From 670e90f5cbd92189155e079b8c6e2aafdf82d162 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (4.0.0): +- Use upstream compat handling + +Rebase notes (4.1.0-rc0): +- Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init + +Rebase notes (4.2.0-rc0): +- Set numa_mem_supported + +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + +Rebase notes (5.0.0): +- Set default_ram_id in rhel_machine_class_init +- Added setting acpi properties + +Rebase notes (5.1.0): +- Added ras property +- Added to virt_machine_device_unplug_cb to machine type (upstream) +- added mte property (upstream) + +Rebase notes (weekly-210210): +- Added support for oem fields to machine type + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (6.0.0-rc2): +- renamed oem-id and oem-table-id to x-oem-id and x-oem-table-id + +Rebase notes (210623): +- Protect TPM functions by CONFIG_TPM ifdef + +Rebase notes (6.1.0-rc0): +- Add support for default_bus_bypass_iommu + +Merged patches (4.0.0): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Merged patches (4.1.0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Merged patches (5.2.0 rc0): +- 12990ad hw/arm: Changes to rhel820 machine +- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper +- 098954a hw/arm: Add rhel830 machine type +- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types +- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) + +Merged patches (6.0): +- 078fadb5da AArch64 machine types cleanup +- ea7b7425fa hw/arm/virt: Add 8.4 Machine type + +Merged patches (weekly-210609): +- 73b1578882 hw/arm/virt: Add 8.5 machine type +- 5333038d11 hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- 63adb8ae86 arm/virt: Register highmem and gic-version as class properties + +Merged patches (weekly-211027): +- 86e3057c0a hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type +--- + hw/arm/virt.c | 226 +++++++++++++++++++++++++++++++++++++++++- + hw/core/machine.c | 2 + + include/hw/arm/virt.h | 8 ++ + 3 files changed, 235 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5de4d9d73b..c77d26ab13 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -105,7 +106,48 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem(Object *obj, Error **errp) + { +@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -3031,3 +3078,180 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ ++} ++ ++static void rhel_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ ++ /* High memory is enabled by default */ ++ vms->highmem = true; ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ vms->default_bus_bypass_iommu = false; ++ vms->irqmap = a15irqmap; ++ ++ virt_flash_create(vms); ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel850_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++ ++static void rhel840_virt_options(MachineClass *mc) ++{ ++ rhel850_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_RHEL_MACHINE(8, 4, 0) ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; ++} ++DEFINE_RHEL_MACHINE(8, 3, 0) ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; ++} ++DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/core/machine.c b/hw/core/machine.c +index be4f9864cd..62febde5aa 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -87,6 +87,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index dc6b66ffc8..9364628847 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -175,9 +175,17 @@ struct VirtMachineState { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); + +-- +2.27.0 + diff --git a/SOURCES/0009-Add-ppc64-machine-types.patch b/SOURCES/0009-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..f5ce09a --- /dev/null +++ b/SOURCES/0009-Add-ppc64-machine-types.patch @@ -0,0 +1,714 @@ +From 3c65320ce5b8ad3bb8c0d8fd13a88c464d5c5845 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Rebase changes (4.1.0): +- Update format for compat structures + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (4.0.0): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Merged patches (4.1.0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type + +Merged patches (4.2.0): +- bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) + +Merged patches (5.1.0): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) + +Merged patches (5.2.0 rc0): +- 311a20f redhat: define pseries-rhel8.3.0 machine type +- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types +- 1ab8783 redhat: update pseries-rhel8.2.0 machine type +- b162af531a target/ppc: Add experimental option for enabling secure guests + +Merged patches (weekly-201216): +- 943c936df3 redhat: Add spapr_machine_rhel_default_class_options() +- 030b5e6fba redhat: Define pseries-rhel8.4.0 machine type + +Merged patches (weekly-210602): +- b7128d8ef7 redhat: Define pseries-rhel8.5.0 machine type + +Merged patches (weekly-211006): +- c8f68b47e9 redhat: Update pseries-rhel8.5.0 +--- + hw/ppc/spapr.c | 382 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ + include/hw/ppc/spapr.h | 4 + + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + target/ppc/kvm.c | 27 +++ + target/ppc/kvm_ppc.h | 13 ++ + 7 files changed, 452 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 3b5fd749be..cace86028d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-6.2 + */ +@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + return true; + } ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4958,6 +4986,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ ++#endif + + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -5013,6 +5042,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5127,6 +5157,358 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ ++/* ++ * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like spapr_compat_2_9 ++ */ ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8ba34f6a1d..78eca1c04a 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return false; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index ee7504b976..37a014d59c 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + ++ bool has_power9_support; + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +@@ -237,6 +238,9 @@ struct SpaprMachineState { + + /* Set by -boot */ + char *boot_device; ++ ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; + + /*< public >*/ + char *kvm_type; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7949a24f5a..f207a9ba01 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index e946da5f3a..23e8b76c85 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.27.0 + diff --git a/SOURCES/0010-Add-s390x-machine-types.patch b/SOURCES/0010-Add-s390x-machine-types.patch new file mode 100644 index 0000000..fbb8841 --- /dev/null +++ b/SOURCES/0010-Add-s390x-machine-types.patch @@ -0,0 +1,165 @@ +From 4ad9a0d0582eef78946b47563eb2c5b7ddf0cbb0 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-4.1.0): +- Use upstream compat handling + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (4.1.0): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Merged patches (4.2.0): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Merged patches (weekly-201216): +- a6ae745cce redhat: s390x: add rhel-8.4.0 compat machine + +Merged patches (weekly-210602): +- 50835d3429 redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (weekly-211006): +- a3bcde27fe redhat: Add s390x machine type compatibility update for 6.1 rebase +--- + hw/s390x/s390-virtio-ccw.c | 99 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 98 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 653587ea62..181856e6cf 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_6_2_instance_options(MachineState *machine) + { + } +@@ -1100,6 +1101,102 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel840_instance_options(machine); ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); ++} ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + + static void ccw_machine_register_types(void) + { +-- +2.27.0 + diff --git a/SOURCES/0011-Add-x86_64-machine-types.patch b/SOURCES/0011-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..2702772 --- /dev/null +++ b/SOURCES/0011-Add-x86_64-machine-types.patch @@ -0,0 +1,1276 @@ +From c2b3564ce466bc5069bf9f5b0694025c68b0858d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (qemu-4.0.0): +- Use upstream compat handling + +Rebase notes (3.1.0): +- Removed xsave changes + +Rebase notes (4.1.0): +- Updated format for compat structures + +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (weekly-210519): +- kvm_default_props moved to new file (upstream) + +Rebase notes (6.2.0-rc0): +- linuxboot_dma_enabled moved to X86MachineState + +Merged patches (4.1.0): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Merged patches (4.2.0): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) + +Merged patches (5.1.0): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (5.2.0 rc0): +- b02c9f5 x86: Add 8.3.0 x86_64 machine type +- f2edc4f q35: Set max_cpus to 512 +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) +- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) + +Merged patches (weekly-210120): +- d0afeaa0c4 RHEL: Switch pvpanic test to q35 +- e19cdad83c 8.4 x86 machine type + +Merged patches (weekly-210203): +- 96f8781bd6 q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types + +Merged patches (weekly-210224): +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - machine type chunks only + +Merged patches (6.0.0 rc0): +- 031c690804 i386/acpi: restore device paths for pre-5.1 vms + +Merged patches (weekly-210623): +- 64c350696f x86: Add x86 rhel8.5 machine types +- 1c8fe5e164 redhat: x86: Enable 'kvm-asyncpf-int' by default + +Merged patches (weekly-210714): +- 618e2424ed redhat: Expose upstream machines pc-4.2 and pc-2.11 +- c4d1aa8bf2 redhat: Enable FDC device for upstream machines too +- 66882f9a32 redhat: Add hw_compat_4_2_extra and apply to upstream machines + +Fix machine type +--- + hw/block/fdc.c | 5 +- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 298 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 274 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 45 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 862 insertions(+), 9 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 97fa6de423..63042ef030 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,7 +2341,10 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.")) { ++ if (!strstr(mc->name, "-rhel7.") && ++ /* Exported two upstream machine types allows FDC too */ ++ strcmp(mc->name, "pc-i440fx-4.2") && ++ strcmp(mc->name, "pc-i440fx-2.11")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a99c6e4fe3..447ea35275 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -230,6 +230,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a2ef40ecbc..e8109954ca 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -371,6 +371,296 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ ++/* ++ * RHEL: These properties only apply to the RHEL exported machine types ++ * pc-4.2/2.11 for the purpose to have a limited upstream machines support ++ * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly ++ * so that we can have some isolation against the upstream code. ++ */ ++GlobalProperty hw_compat_4_2_extra[] = { ++ /* By default enlarge the default virtio-net-pci ROM to 512KB. */ ++ { "virtio-net-pci", "romsize", "0x80000" }, ++}; ++const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +@@ -904,7 +1194,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -1694,6 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -1704,7 +1997,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dda3f64f19..2885edffe9 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/xen.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,15 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++/* ++ * NOTE! Not all the upstream machine types are disabled for RHEL. For ++ * providing a very limited support for upstream machine types, pc machines ++ * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros ++ * a bit messed up, but please read this comment first so that we can have a ++ * rough understanding of what we're going to do. ++ */ ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -389,6 +399,8 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -424,8 +436,10 @@ static void pc_i440fx_6_2_machine_options(MachineClass *m) + pcmc->default_cpu_version = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL, + pc_i440fx_6_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_1_machine_options(MachineClass *m) + { +@@ -437,8 +451,10 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m) + m->smp_props.prefer_sockets = true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_0_machine_options(MachineClass *m) + { +@@ -449,8 +465,10 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -461,8 +479,10 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -477,8 +497,10 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -491,8 +513,10 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -501,8 +525,21 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); ++ ++ /* ++ * RHEL: Mark all upstream machines as deprecated because they're not ++ * supported by RHEL, even if exported. ++ */ ++ m->deprecation_reason = "Not supported by RHEL"; ++ /* ++ * RHEL: Specific compat properties to have limited support for upstream ++ * machines exported. ++ */ ++ compat_props_add(m->compat_props, hw_compat_4_2_extra, ++ hw_compat_4_2_extra_len); + } + ++/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -515,8 +552,10 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -529,8 +568,10 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -546,8 +587,10 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -556,8 +599,10 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -566,8 +611,10 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -576,9 +623,11 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + ++/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +@@ -951,3 +1000,224 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ x86mc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 235054a643..c67418b6a9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -620,3 +621,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 710; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 8bba96ef2b..04e8759815 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -263,6 +263,8 @@ struct MachineClass { + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 7ccc9a1a07..d0544ee119 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -125,6 +125,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -280,6 +283,48 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; ++ ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; ++ ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; ++ ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; ++ ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; ++ ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; ++ ++extern GlobalProperty hw_compat_4_2_extra[]; ++extern const size_t hw_compat_4_2_extra_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..7b004065ae 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5a698bde19..a668f521ac 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.27.0 + diff --git a/SOURCES/0012-Enable-make-check.patch b/SOURCES/0012-Enable-make-check.patch new file mode 100644 index 0000000..b2ff35a --- /dev/null +++ b/SOURCES/0012-Enable-make-check.patch @@ -0,0 +1,407 @@ +From 740a2dd943a2e0fcd41a9cd8eb94a136f8f49fa2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + +Rebase changes (5.1.0-rc1): +- Disable qtest/q35-test (uses upstream machine types) +- Do not run iotests on make checka +- Enabled iotests 071 and 099 + +Rebase changes (5.2.0 rc0): +- Disable cdrom tests (unsupported devices) on x86_64 +- disable fuzz test + +Rebase changes (6.0.0): +- Disabled xlnx-can-test +- Disable pxb-pcie subtest for bios-table-test +- Replace qtest usage of upstream q35 machine type with pc-q35-rhel8.4.0 +- Not run cdrom-test on aarch64 + +Rebase changes (6.1.0): +- Remove unnecessary test disabling changes + +Rebase changes (weekly-211006): +- New handling for bios-table-test (disabled downstream) + +Merged patches (4.0.0): +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 8 ++++---- + tests/qtest/bios-tables-test.c | 5 ++++- + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cdrom-test.c | 4 ++++ + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 13 ++++--------- + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 14 files changed, 41 insertions(+), 21 deletions(-) + +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 1d2fa93a11..c8a2815f54 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -174,9 +174,9 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 258874167e..16d8304cde 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1372,6 +1372,7 @@ static void test_acpi_virt_tcg_numamem(void) + + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_virt_tcg_pxb(void) + { + test_data data = { +@@ -1403,6 +1404,7 @@ static void test_acpi_virt_tcg_pxb(void) + + free_test_data(&data); + } ++#endif + + static void test_acpi_tcg_acpi_hmat(const char *machine) + { +@@ -1644,7 +1646,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++ /* Disabled for Red Hat Enterprise Linux ++ qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } + } +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 83828ba270..294476b959 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -148,19 +148,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine " PSERIES_DEFAULT_CAPABILITIES, + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c +index 5af944a5fb..69d9bac38a 100644 +--- a/tests/qtest/cdrom-test.c ++++ b/tests/qtest/cdrom-test.c +@@ -140,6 +140,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/isapc", "-M isapc " + "-drive if=ide,media=cdrom,file=", test_cdboot); + } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_data_func("cdrom/boot/am53c974", + "-device am53c974 -device scsi-cd,drive=cd1 " + "-drive if=none,id=cd1,format=raw,file=", test_cdboot); +@@ -155,6 +156,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " + "-device megasas-gen2 -device scsi-cd,drive=cd1 " + "-blockdev file,node-name=cd1,filename=", test_cdboot); ++#endif + } + + static void add_s390x_tests(void) +@@ -220,6 +222,7 @@ int main(int argc, char **argv) + "magnum", "malta", "pica61", NULL + }; + add_cdrom_param_tests(mips64machines); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { + const char *armmachines[] = { + "realview-eb", "realview-eb-mpcore", "realview-pb-a8", +@@ -227,6 +230,7 @@ int main(int argc, char **argv) + "vexpress-a9", "virt", NULL + }; + add_cdrom_param_tests(armmachines); ++#endif + } else { + const char *nonemachine[] = { "none", NULL }; + add_cdrom_param_tests(nonemachine); +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index a1c689414b..a8f076711c 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 113126ae06..999ef2aace 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -960,9 +962,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index c9d8458062..049e06c057 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -68,7 +68,6 @@ qtests_i386 = \ + (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ + (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -81,7 +80,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -130,17 +128,15 @@ qtests_mips64el = \ + + qtests_ppc = \ + (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ +- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ +- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ ++ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + + qtests_ppc64 = \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ +- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ ++ (slirp.found() ? ['pxe-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ +- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ + qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] + + qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +@@ -186,8 +182,8 @@ qtests_aarch64 = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'xlnx-can-test', +- 'fuzz-xlnx-dp-test', ++# 'xlnx-can-test', ++# 'fuzz-xlnx-dp-test', + 'migration-test'] + + qtests_s390x = \ +@@ -196,7 +192,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index f41d80154a..f8dc478ce8 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index f28848e06e..6b2fd398a2 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,xstore=on", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +2.27.0 + diff --git a/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..d9c8d42 --- /dev/null +++ b/SOURCES/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,110 @@ +From e9ebc159a9acf108e1ec6f622be3f256cf14aba7 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7b45353ce2..eb725a3aee 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -45,6 +45,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 64777516d1..e0fe6ca97e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.27.0 + diff --git a/SOURCES/0014-Add-support-statement-to-help-output.patch b/SOURCES/0014-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..2259e13 --- /dev/null +++ b/SOURCES/0014-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From b736b0c41dd62ed6f874a7b33ca1d4f9ceab4573 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + softmmu/vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 620a1f1367..d46b8fb4ab 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -827,9 +827,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -855,6 +863,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.27.0 + diff --git a/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..31d9643 --- /dev/null +++ b/SOURCES/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,65 @@ +From 9a7621819821ee88d2f99d6b629fd87aa9a07758 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones + +Merged patches (5.1.0): +- redhat: globally limit the maximum number of CPUs +- redhat: remove manual max_cpus limitations for ppc +- use recommended max vcpu count + +Merged patches (5.2.0 rc0): +- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index eecd8031cf..8f2a53438f 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.27.0 + diff --git a/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..9eda7c3 --- /dev/null +++ b/SOURCES/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,126 @@ +From 0d3fc0b4c5773c6cabb0a58c064475f76eb6ac1e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (5.1.0 rc0): + - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Rebase notes (5.2.0 rc0): + - rewrite patch to new docs structure +--- + docs/defs.rst.inc | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + qemu-options.hx | 10 +++++----- + 3 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst +index d53073b52b..9e93df084f 100644 +--- a/docs/tools/qemu-trace-stap.rst ++++ b/docs/tools/qemu-trace-stap.rst +@@ -46,19 +46,19 @@ The following commands are valid: + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + +- For example, to list all probes available in the ``qemu-system-x86_64`` ++ For example, to list all probes available in the ``qemu-kvm`` + binary: + + :: + +- $ qemu-trace-stap list qemu-system-x86_64 ++ $ qemu-trace-stap list qemu-kvm + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + +- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' ++ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' + + .. option:: run OPTIONS BINARY PATTERN... + +@@ -90,18 +90,18 @@ The following commands are valid: + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + +- For example, to monitor all processes executing ``qemu-system-x86_64`` ++ For example, to monitor all processes executing ``qemu-kvm`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + +- $ qemu-trace-stap run qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run qemu-kvm 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + +- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the +@@ -109,7 +109,7 @@ The following commands are valid: + + :: + +- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' + + See also + -------- +diff --git a/qemu-options.hx b/qemu-options.hx +index ae2c6dbbfc..94c4a8dbaf 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3150,11 +3150,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.27.0 + diff --git a/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..6b60efc --- /dev/null +++ b/SOURCES/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From d95768c039a2bf6b68422f83a8d55dad41bd3181 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 51fd09522a..a35257c35a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.27.0 + diff --git a/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..e07746d --- /dev/null +++ b/SOURCES/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 92bb62c47eab021f8dabecd09b5fbc1706e6a29c Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index ed7c077a0d..48a8efe678 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.27.0 + diff --git a/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch b/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch new file mode 100644 index 0000000..6d2b7c3 --- /dev/null +++ b/SOURCES/0019-compat-Update-hw_compat_rhel_8_5.patch @@ -0,0 +1,53 @@ +From a9b5da617c29f48199cbea08d6a1c083877dce10 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:22:29 +0100 +Subject: compat: Update hw_compat_rhel_8_5 + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [1/2] 232f2ad2b29d250fbdb8fcea9d814704c575ba2b +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz + +Add properties from hw_compat_6_1 as it already includes the ones from +hw_compat_6_0. Add a lately added property from 6.0 too. + +Signed-off-by: Laurent Vivier +-- +Rebase notes (6.2.0 rc3): +- Included compatc changes introduced in RC2 +--- + hw/core/machine.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 62febde5aa..736c765c30 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -38,7 +38,7 @@ + #include "hw/virtio/virtio-pci.h" + + /* +- * Mostly the same as hw_compat_6_0 ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ + GlobalProperty hw_compat_rhel_8_5[] = { + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ +@@ -51,6 +51,12 @@ GlobalProperty hw_compat_rhel_8_5[] = { + { "e1000", "init-vet", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ + { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, + }; + const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + +-- +2.27.0 + diff --git a/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch b/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch new file mode 100644 index 0000000..af8e9dd --- /dev/null +++ b/SOURCES/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch @@ -0,0 +1,43 @@ +From 82358c35f04f026820b3907069a6c19cd95b654d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:25:33 +0100 +Subject: redhat: Update pseries-rhel8.5.0 machine type + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [2/2] 36f7ad1ea56baaaecb139875ad0a90a6470196be +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +` +We don't introduce a new machine type for rhel8.6.0 but we need +to keep compatibility with rhel8.5.0 machine type. + +Signed-off-by: Laurent Vivier +--- + hw/ppc/spapr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index cace86028d..2f27888d8a 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5177,10 +5177,14 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel850_class_options(MachineClass *mc) + { ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ + /* The default machine type must apply the RHEL specific defaults */ + spapr_machine_rhel_default_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; + } + + DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); +-- +2.27.0 + diff --git a/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch new file mode 100644 index 0000000..3bcf4e0 --- /dev/null +++ b/SOURCES/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch @@ -0,0 +1,51 @@ +From ce73e939b993cc6be170cdb5d3f2068270593f2b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 16 Nov 2021 17:03:07 +0100 +Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU + 6.2.0 update + +RH-Author: Eric Auger +RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed +RH-Bugzilla: 2022607 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +To keep compatibility with 8.5-AV machine type we need to +turn few new options on by default: +smp_props.prefer_sockets, no_cpu_topology, no_tcg_its + +TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa +with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version +increments in vmstate_its + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c77d26ab13..e8941afd01 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3225,8 +3225,13 @@ type_init(rhel_machine_init); + + static void rhel850_virt_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++ vmc->no_cpu_topology = true; ++ vmc->no_tcg_its = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) + +-- +2.27.0 + diff --git a/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch b/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch new file mode 100644 index 0000000..b484ea1 --- /dev/null +++ b/SOURCES/0022-Fix-virtio-net-pci-vectors-compat.patch @@ -0,0 +1,45 @@ +From f9643b6934657292aae0b830627b1e5f9b8cbaa1 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 19 Oct 2021 13:17:06 -0400 +Subject: Fix virtio-net-pci* "vectors" compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [21/23] 8ad581932275d2698a99f31bec40b14f1dbd3d2e +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" +but not "virtio-net-pci-transitional" and +"virtio-net-pci-non-transitional". The solution is to use the +"virtio-net-pci-base" type in compat_props. + +An equivalent fix will be submitted for hw_compat_5_2 upstream. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 736c765c30..024b025fc2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -71,7 +71,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ +- { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch new file mode 100644 index 0000000..8572d61 --- /dev/null +++ b/SOURCES/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch @@ -0,0 +1,73 @@ +From 7ad8814e583dcc7dc23e3e8398570243b8f176a1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 17:57:42 +0000 +Subject: x86/rhel machine types: Add pc_rhel_8_5_compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [22/23] 8bf555c5d78f344b97ffd5c888c7a7bed592d9d0 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 +(since 8.5 was based on 6.0). + +Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out +looks like it leaves us with the original. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 21 +++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index e8109954ca..4c08a1971c 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d0544ee119..9e8bfb69f8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -286,6 +286,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ + extern GlobalProperty pc_rhel_8_4_compat[]; + extern const size_t pc_rhel_8_4_compat_len; + +-- +2.27.0 + diff --git a/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch new file mode 100644 index 0000000..4acfa88 --- /dev/null +++ b/SOURCES/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch @@ -0,0 +1,54 @@ +From 7bd99eebadfdbea6a76585b526e7cab1ee8b1fde Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 18:07:49 +0000 +Subject: x86/rhel machine types: Wire compat into q35 and i440fx + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [23/23] fc3861aeccc943b434231193ef45ffbc0b3cf6c6 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Wire the pc_rhel_8_5 compat data into both piix and q35 +to keep the existing machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2885edffe9..37fab00733 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1040,6 +1040,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c67418b6a9..78876e1101 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch b/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch new file mode 100644 index 0000000..1ae8a99 --- /dev/null +++ b/SOURCES/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch @@ -0,0 +1,58 @@ +From 265a57f2955b7f0b65e3f57f89aa1ff2541d3f73 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 26 Nov 2021 09:37:11 +0100 +Subject: redhat: Add s390x machine type compatibility handling for the rebase + to v6.2 + +RH-Author: Thomas Huth +RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 +RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 +RH-Bugzilla: 2022602 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck + +Add compatibility handling for the rhel8.5.0 machine type (and +recursively older, of course). + +Based on the following upstream commits: + + 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 + 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model + 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 + 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 181856e6cf..cf13c457d6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); + } + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/SOURCES/81-kvm-rhel.rules b/SOURCES/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/SOURCES/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/SOURCES/85-kvm.preset b/SOURCES/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/SOURCES/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/SOURCES/95-kvm-memlock.conf b/SOURCES/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/SOURCES/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/SOURCES/99-qemu-guest-agent.rules b/SOURCES/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/SOURCES/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/SOURCES/README.tests b/SOURCES/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/SOURCES/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/SOURCES/bridge.conf b/SOURCES/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/SOURCES/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/SOURCES/ksm.service b/SOURCES/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/SOURCES/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/ksm.sysconfig b/SOURCES/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/SOURCES/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/SOURCES/ksmctl.c b/SOURCES/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/SOURCES/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/SOURCES/ksmtuned b/SOURCES/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/SOURCES/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/SOURCES/ksmtuned.conf b/SOURCES/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/SOURCES/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/SOURCES/ksmtuned.service b/SOURCES/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/SOURCES/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch new file mode 100644 index 0000000..ad2b261 --- /dev/null +++ b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch @@ -0,0 +1,87 @@ +From cd49a32e9c9e33efc51652b68180a07683814b4d Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:12 -0300 +Subject: [PATCH 4/9] Add dirty-sync-missed-zero-copy migration stat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [4/8] 56cce61cf95aafc8dafae7531b43c166084abfec +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Signed-off-by: Leonardo Bras +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220711211112.18951-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 2 ++ + monitor/hmp-cmds.c | 5 +++++ + qapi/migration.json | 7 ++++++- + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index e100b30f00..952a26c5c2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1012,6 +1012,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_missed_zero_copy = ++ ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 8c384dc1b2..f7216ab5d0 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); + } ++ if (info->ram->dirty_sync_missed_zero_copy) { ++ monitor_printf(mon, ++ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", ++ info->ram->dirty_sync_missed_zero_copy); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index c8ec260ab0..94bc5c69db 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -55,6 +55,10 @@ + # @postcopy-bytes: The number of bytes sent during the post-copy phase + # (since 7.0). + # ++# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could ++# not avoid copying dirty pages. This is between ++# 0 and @dirty-sync-count * @multifd-channels. ++# (since 7.1) + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -65,7 +69,8 @@ + 'postcopy-requests' : 'int', 'page-size' : 'int', + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', +- 'postcopy-bytes' : 'uint64' } } ++ 'postcopy-bytes' : 'uint64', ++ 'dirty-sync-missed-zero-copy' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.31.1 + diff --git a/SOURCES/kvm-Enable-SGX-RH-Only.patch b/SOURCES/kvm-Enable-SGX-RH-Only.patch new file mode 100644 index 0000000..efc8cac --- /dev/null +++ b/SOURCES/kvm-Enable-SGX-RH-Only.patch @@ -0,0 +1,28 @@ +From db6e042fe4fdc1a1bbf562a46b15d4d8e33e2fa6 Mon Sep 17 00:00:00 2001 +From: Paul Lai +Date: Tue, 25 Jan 2022 15:16:22 -0500 +Subject: [PATCH 4/7] Enable SGX -- RH Only + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [4/5] cea874f29984897ef1232fb7749c13203c888034 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ddf036f042..fdbbdf9742 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -102,3 +102,4 @@ CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y ++CONFIG_SGX=y +-- +2.27.0 + diff --git a/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch new file mode 100644 index 0000000..8fad887 --- /dev/null +++ b/SOURCES/kvm-KVM-keep-track-of-running-ioctls.patch @@ -0,0 +1,82 @@ +From 9bacf8c4104ff3cff2e0e2c2179ec4fda633167f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:51:08 -0500 +Subject: [PATCH 05/11] KVM: keep track of running ioctls + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 357508389e2a0fd996206b406e9e235e50b5f0b6 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit a27dd2de68f37ba96fe164a42121daa5f0750afc +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:57 2022 -0500 + + KVM: keep track of running ioctls + + Using the new accel-blocker API, mark where ioctls are being called + in KVM. Next, we will implement the critical section that will take + care of performing memslots modifications atomically, therefore + preventing any new ioctl from running and allowing the running ones + to finish. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 8f2a53438f..221aadfda7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2337,6 +2337,7 @@ static int kvm_init(MachineState *ms) + assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size); + + s->sigmask_len = 8; ++ accel_blocker_init(); + + #ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_INIT(&s->kvm_sw_breakpoints); +@@ -3018,7 +3019,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) + va_end(ap); + + trace_kvm_vm_ioctl(type, arg); ++ accel_ioctl_begin(); + ret = ioctl(s->vmfd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +@@ -3036,7 +3039,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) + va_end(ap); + + trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); ++ accel_cpu_ioctl_begin(cpu); + ret = ioctl(cpu->kvm_fd, type, arg); ++ accel_cpu_ioctl_end(cpu); + if (ret == -1) { + ret = -errno; + } +@@ -3054,7 +3059,9 @@ int kvm_device_ioctl(int fd, int type, ...) + va_end(ap); + + trace_kvm_device_ioctl(fd, type, arg); ++ accel_ioctl_begin(); + ret = ioctl(fd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch b/SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch new file mode 100644 index 0000000..1a0beb2 --- /dev/null +++ b/SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch @@ -0,0 +1,109 @@ +From ea5299b5dde7d0b6b2f93cb646e6a24c9f105466 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 23 Mar 2022 12:33:25 +0100 +Subject: [PATCH 13/24] KVM: x86: workaround invalid CPUID[0xD,9] info on some + AMD processors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [13/13] 38f147c911258e84e01336271ebd23a1c24371fc +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Some AMD processors expose the PKRU extended save state even if they do not have +the related PKU feature in CPUID. Worse, when they do they report a size of +64, whereas the expected size of the PKRU extended save state is 8, therefore +the esa->size == eax assertion does not hold. + +The state is already ignored by KVM_GET_SUPPORTED_CPUID because it +was not enabled in the host XCR0. However, QEMU kvm_cpu_xsave_init() +runs before QEMU invokes arch_prctl() to enable dynamically-enabled +save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save +states that have yet to be enabled. Therefore, kvm_cpu_xsave_init() +needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID, +and dies with an assertion failure. + +When setting up the ExtSaveArea array to match the host, ignore features that +KVM does not report as supported. This will cause QEMU to skip the incorrect +CPUID leaf instead of tripping the assertion. + +Closes: https://gitlab.com/qemu-project/qemu/-/issues/916 +Reported-by: Daniel P. Berrangé +Analyzed-by: Yang Zhong +Reported-by: Peter Krempa +Tested-by: Daniel P. Berrangé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 58f7db26f21c690cf9a669c314cfd7371506084a) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 2 ++ + target/i386/kvm/kvm-cpu.c | 19 ++++++++++++------- + 3 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 09e08f7f38..0543b846ff 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4980,8 +4980,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + return cpu_list; + } + +-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, +- bool migratable_only) ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only) + { + FeatureWordInfo *wi = &feature_word_info[w]; + uint64_t r = 0; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8ab2a4042a..006b735fe4 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -604,6 +604,8 @@ typedef enum FeatureWord { + } FeatureWord; + + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only); + + /* cpuid_features bits */ + #define CPUID_FP87 (1U << 0) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index bdc967c484..74c1396a93 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void) + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + +- if (esa->size) { +- host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); +- if (eax != 0) { +- assert(esa->size == eax); +- esa->offset = ebx; +- esa->ecx = ecx; +- } ++ if (!esa->size) { ++ continue; ++ } ++ if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) ++ != esa->bits) { ++ continue; ++ } ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch new file mode 100644 index 0000000..81ae532 --- /dev/null +++ b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -0,0 +1,420 @@ +From 7eeec7c008e947bc3e1fed682791092b408852c6 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 17/37] QIOChannel: Add flags on io_writev and introduce + io_flush callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [17/26] 7bde4e79fd3f76a6cc84d9cacf50420584ddd35c +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Add flags to io_writev and introduce io_flush as optional callback to +QIOChannelClass, allowing the implementation of zero copy writes by +subclasses. + +How to use them: +- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), +- Wait write completion with qio_channel_flush(). + +Notes: +As some zero copy write implementations work asynchronously, it's +recommended to keep the write buffer untouched until the return of +qio_channel_flush(), to avoid the risk of sending an updated buffer +instead of the buffer state during write. + +As io_flush callback is optional, if a subclass does not implement it, then: +- io_flush will return 0 without changing anything. + +Also, some functions like qio_channel_writev_full_all() were adapted to +receive a flag parameter. That allows shared code between zero copy and +non-zero copy writev, and also an easier implementation on new flags. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) +Signed-off-by: Leonardo Bras +--- + chardev/char-io.c | 2 +- + hw/remote/mpqemu-link.c | 2 +- + include/io/channel.h | 38 +++++++++++++++++++++- + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 2 ++ + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 49 +++++++++++++++++++++++------ + migration/rdma.c | 1 + + scsi/pr-manager-helper.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + 13 files changed, 88 insertions(+), 14 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 8ced184160..4451128cba 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, + + ret = qio_channel_writev_full( + ioc, &iov, 1, +- fds, nfds, NULL); ++ fds, nfds, 0, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + if (offset) { + return offset; +diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c +index 7e841820e5..e8f556bd27 100644 +--- a/hw/remote/mpqemu-link.c ++++ b/hw/remote/mpqemu-link.c +@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) + } + + if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), +- fds, nfds, errp)) { ++ fds, nfds, 0, errp)) { + ret = true; + } else { + trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); +diff --git a/include/io/channel.h b/include/io/channel.h +index 88988979f8..c680ee7480 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_ERR_BLOCK -2 + ++#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + }; + + +@@ -104,6 +107,7 @@ struct QIOChannelClass { + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, +@@ -136,6 +140,8 @@ struct QIOChannelClass { + IOHandler *io_read, + IOHandler *io_write, + void *opaque); ++ int (*io_flush)(QIOChannel *ioc, ++ Error **errp); + }; + + /* General I/O handling functions */ +@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the +@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + + /** +@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * +@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * to be written, yielding from the current coroutine + * if required. + * ++ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, ++ * instead of waiting for all requested data to be written, ++ * this function will wait until it's all queued for writing. ++ * In this case, if the buffer gets changed between queueing and ++ * sending, the updated buffer will be sent. If this is not a ++ * desired behavior, it's suggested to call qio_channel_flush() ++ * before reusing the buffer. ++ * + * Returns: 0 if all bytes were written, or -1 on error + */ + +@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp); ++ int flags, Error **errp); ++ ++/** ++ * qio_channel_flush: ++ * @ioc: the channel object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Will block until every packet queued with ++ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY ++ * is sent, or return in case of any error. ++ * ++ * If not implemented, acts as a no-op, and returns 0. ++ * ++ * Returns -1 if any error is found, ++ * 1 if every send failed to use zero copy. ++ * 0 otherwise. ++ */ ++ ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp); + + #endif /* QIO_CHANNEL_H */ +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index baa4e2b089..bf52011be2 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index b2a9e27138..5ff1691bad 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index c4bf799a80..348a48545e 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 606ec97cf7..bfbd64787e 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 2ae1b92fc0..4ce890a538 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 70889bb54d..035dd6075b 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index e8b019dc36..0640941ac5 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + +- if ((fds || nfds) && +- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ if (fds || nfds) { ++ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support file descriptor passing"); ++ return -1; ++ } ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ error_setg_errno(errp, EINVAL, ++ "Zero Copy does not support file descriptor passing"); ++ return -1; ++ } ++ } ++ ++ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg_errno(errp, EINVAL, +- "Channel does not support file descriptor passing"); ++ "Requested Zero Copy feature is not available"); + return -1; + } + +- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); ++ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); + } + + int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp) ++ int flags, Error **errp) + { + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); +@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + + while (nlocal_iov > 0) { + ssize_t len; +- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, +- errp); ++ ++ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, ++ nfds, flags, errp); ++ + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); +@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); + } + + +@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; +- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); + } + + +@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, + return klass->io_seek(ioc, offset, whence, errp); + } + ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); ++ ++ if (!klass->io_flush || ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ return 0; ++ } ++ ++ return klass->io_flush(ioc, errp); ++} ++ + + static void qio_channel_restart_read(void *opaque) + { +diff --git a/migration/rdma.c b/migration/rdma.c +index f5d3bbe7e9..54acd2000e 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2833,6 +2833,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c +index 451c7631b7..3be52a98d5 100644 +--- a/scsi/pr-manager-helper.c ++++ b/scsi/pr-manager-helper.c +@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, + iov.iov_base = (void *)buf; + iov.iov_len = sz; + n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, +- nfds ? &fd : NULL, nfds, errp); ++ nfds ? &fd : NULL, nfds, 0, errp); + + if (n_written <= 0) { + assert(n_written != QIO_CHANNEL_ERR_BLOCK); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index c49eec1f03..6713886d02 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iosend), + fdsend, + G_N_ELEMENTS(fdsend), ++ 0, + &error_abort); + + qio_channel_readv_full(dst, +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch new file mode 100644 index 0000000..98f1ac4 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch @@ -0,0 +1,56 @@ +From a6c4aed18a027ce8e107fdf9184e9ea43a86f843 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 4 Aug 2022 04:10:43 -0300 +Subject: [PATCH 8/9] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [8/8] 6e26ee7c9ebaedb07623313cb0678816867751dd +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +For using MSG_ZEROCOPY, there are two steps: +1 - io_writev() the packet, which enqueues the packet for sending, and +2 - io_flush(), which gets confirmation that all packets got correctly sent + +Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will +be reported in (1), but it will fail in the first time (2) happens. + +This happens because (2) currently checks for cmsg_level & cmsg_type +associated with IPV4 only, before reporting any error. + +Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable +support for MSG_ZEROCOPY + IPV6 + +Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index cf0d67c51b..6010ad7017 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + } + + cm = CMSG_FIRSTHDR(&msg); +- if (cm->cmsg_level != SOL_IP && +- cm->cmsg_type != IP_RECVERR) { ++ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && ++ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { + error_setg_errno(errp, EPROTOTYPE, + "Wrong cmsg in errqueue"); + return -1; +-- +2.31.1 + diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch new file mode 100644 index 0000000..5806062 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch @@ -0,0 +1,65 @@ +From 905cc8032fc63619efb3f0a8c9754b7190bcc43a Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:11 -0300 +Subject: [PATCH 3/9] QIOChannelSocket: Fix zero-copy flush returning code 1 + when nothing sent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [3/8] 1ad707702fa26cd4d0fa1870c21f5f26ae93ff97 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently +returns 1. This return code should be used only when Linux fails to use +MSG_ZEROCOPY on a lot of sendmsg(). + +Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) +was attempted. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Message-Id: <20220711211112.18951-2-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index df858da924..cf0d67c51b 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + struct cmsghdr *cm; + char control[CMSG_SPACE(sizeof(*serr))]; + int received; +- int ret = 1; ++ int ret; ++ ++ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { ++ return 0; ++ } + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + memset(control, 0, sizeof(control)); + ++ ret = 1; ++ + while (sioc->zero_copy_sent < sioc->zero_copy_queued) { + received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); + if (received < 0) { +-- +2.31.1 + diff --git a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch new file mode 100644 index 0000000..685478f --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -0,0 +1,58 @@ +From c1fd32d93ae42fcf3c1a25f4d56e669f251087d8 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:43 -0300 +Subject: [PATCH 25/37] QIOChannelSocket: Fix zero-copy send so socket flush + works +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [25/26] 3ede94f3269e21c3ace073ed1a6f24696315bcbb +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial +part of the flushing mechanism got missing: incrementing zero_copy_queued. + +Without that, the flushing interface becomes a no-op, and there is no +guarantee the buffer is really sent. + +This can go as bad as causing a corruption in RAM during migration. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Reported-by: 徐闯 +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7d37b39de7..df858da924 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + "Unable to write to socket"); + return -1; + } ++ ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sioc->zero_copy_queued++; ++ } ++ + return ret; + } + #else /* WIN32 */ +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch new file mode 100644 index 0000000..4b272ee --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -0,0 +1,249 @@ +From 5fd7af93a06adaddbae719aabbaf912159f4fb28 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 18/37] QIOChannelSocket: Implement io_writev zero copy flag & + io_flush for CONFIG_LINUX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [18/26] 6f65c8c879a5df57213b541d58285b65178f8547 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +For CONFIG_LINUX, implement the new zero copy flag and the optional callback +io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY +feature is available in the host kernel, which is checked on +qio_channel_socket_connect_sync() + +qio_channel_socket_flush() was implemented by counting how many times +sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the +socket's error queue, in order to find how many of them finished sending. +Flush will loop until those counters are the same, or until some error occurs. + +Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: +1: Buffer +- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, +some caution is necessary to avoid overwriting any buffer before it's sent. +If something like this happen, a newer version of the buffer may be sent instead. +- If this is a problem, it's recommended to call qio_channel_flush() before freeing +or re-using the buffer. + +2: Locked memory +- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and +unlocked after it's sent. +- Depending on the size of each buffer, and how often it's sent, it may require +a larger amount of locked memory than usually available to non-root user. +- If the required amount of locked memory is not available, writev_zero_copy +will return an error, which can abort an operation like migration, +- Because of this, when an user code wants to add zero copy as a feature, it +requires a mechanism to disable it, so it can still be accessible to less +privileged users. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) +Signed-off-by: Leonardo Bras +--- + include/io/channel-socket.h | 2 + + io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 4 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index e747e63514..513c428fe4 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -47,6 +47,8 @@ struct QIOChannelSocket { + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; ++ ssize_t zero_copy_queued; ++ ssize_t zero_copy_sent; + }; + + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index bfbd64787e..38a46ba213 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -26,6 +26,14 @@ + #include "io/channel-watch.h" + #include "trace.h" + #include "qapi/clone-visitor.h" ++#ifdef CONFIG_LINUX ++#include ++#include ++ ++#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) ++#define QEMU_MSG_ZEROCOPY ++#endif ++#endif + + #define SOCKET_MAX_FDS 16 + +@@ -55,6 +63,8 @@ qio_channel_socket_new(void) + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; ++ sioc->zero_copy_queued = 0; ++ sioc->zero_copy_sent = 0; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + return -1; + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ int ret, v = 1; ++ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); ++ if (ret == 0) { ++ /* Zero copy available on host */ ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); ++ } ++#endif ++ + return 0; + } + +@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; ++ int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sflags = MSG_ZEROCOPY; ++ } ++#endif ++ + retry: +- ret = sendmsg(sioc->fd, &msg, 0); ++ ret = sendmsg(sioc->fd, &msg, sflags); + if (ret <= 0) { +- if (errno == EAGAIN) { ++ switch (errno) { ++ case EAGAIN: + return QIO_CHANNEL_ERR_BLOCK; +- } +- if (errno == EINTR) { ++ case EINTR: + goto retry; ++#ifdef QEMU_MSG_ZEROCOPY ++ case ENOBUFS: ++ if (sflags & MSG_ZEROCOPY) { ++ error_setg_errno(errp, errno, ++ "Process can't lock enough memory for using MSG_ZEROCOPY"); ++ return -1; ++ } ++ break; ++#endif + } ++ + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; +@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + } + #endif /* WIN32 */ + ++ ++#ifdef QEMU_MSG_ZEROCOPY ++static int qio_channel_socket_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ++ struct msghdr msg = {}; ++ struct sock_extended_err *serr; ++ struct cmsghdr *cm; ++ char control[CMSG_SPACE(sizeof(*serr))]; ++ int received; ++ int ret = 1; ++ ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ memset(control, 0, sizeof(control)); ++ ++ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { ++ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); ++ if (received < 0) { ++ switch (errno) { ++ case EAGAIN: ++ /* Nothing on errqueue, wait until something is available */ ++ qio_channel_wait(ioc, G_IO_ERR); ++ continue; ++ case EINTR: ++ continue; ++ default: ++ error_setg_errno(errp, errno, ++ "Unable to read errqueue"); ++ return -1; ++ } ++ } ++ ++ cm = CMSG_FIRSTHDR(&msg); ++ if (cm->cmsg_level != SOL_IP && ++ cm->cmsg_type != IP_RECVERR) { ++ error_setg_errno(errp, EPROTOTYPE, ++ "Wrong cmsg in errqueue"); ++ return -1; ++ } ++ ++ serr = (void *) CMSG_DATA(cm); ++ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { ++ error_setg_errno(errp, serr->ee_errno, ++ "Error on socket"); ++ return -1; ++ } ++ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { ++ error_setg_errno(errp, serr->ee_origin, ++ "Error not from zero copy"); ++ return -1; ++ } ++ ++ /* No errors, count successfully finished sendmsg()*/ ++ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; ++ ++ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ ++ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { ++ ret = 0; ++ } ++ } ++ ++ return ret; ++} ++ ++#endif /* QEMU_MSG_ZEROCOPY */ ++ + static int + qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, +@@ -789,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; ++#ifdef QEMU_MSG_ZEROCOPY ++ ioc_klass->io_flush = qio_channel_socket_flush; ++#endif + } + + static const TypeInfo qio_channel_socket_info = { +-- +2.35.3 + diff --git a/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch new file mode 100644 index 0000000..2575f64 --- /dev/null +++ b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -0,0 +1,82 @@ +From cbfaf86331c2b2e01a2083303b7554672bf991b7 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:42 -0300 +Subject: [PATCH 24/37] QIOChannelSocket: Introduce assert and reduce ifdefs to + improve readability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [24/26] b50e2e65307149f247155a7f7a032dc99e57718d +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were +introduced, particularly at qio_channel_socket_writev(). + +Rewrite some of those changes so it's easier to read. + +Also, introduce an assert to help detect incorrect zero-copy usage is when +it's disabled on build. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached +(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 38a46ba213..7d37b39de7 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + +-#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++#ifdef QEMU_MSG_ZEROCOPY + sflags = MSG_ZEROCOPY; +- } ++#else ++ /* ++ * We expect QIOChannel class entry point to have ++ * blocked this code path already ++ */ ++ g_assert_not_reached(); + #endif ++ } + + retry: + ret = sendmsg(sioc->fd, &msg, sflags); +@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + return QIO_CHANNEL_ERR_BLOCK; + case EINTR: + goto retry; +-#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: +- if (sflags & MSG_ZEROCOPY) { ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +-#endif + } + + error_setg_errno(errp, errno, +-- +2.35.3 + diff --git a/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch new file mode 100644 index 0000000..27cc557 --- /dev/null +++ b/SOURCES/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch @@ -0,0 +1,107 @@ +From e0e4f01c6f4fb5881960f72ae4e80951b711131e Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 24 Mar 2022 16:04:57 +0100 +Subject: [PATCH 1/5] RHEL: disable "seqpacket" for "vhost-vsock-device" in + rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 136: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 [rhel-8.7.0] +RH-Commit: [1/1] d82ea09e123679521503689f7d9af1c03dc71bfc +RH-Bugzilla: 2068202 +RH-Acked-by: Jason Wang +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. +To avoid problems when migrating a VM from RHEL 9 host, we need to +disable it in rhel8-* machine types. + +Signed-off-by: Stefano Garzarella +--- + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 5 files changed, 18 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 024b025fc2..76fcabec7a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,16 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2068202 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index f03a8f0db8..ab6d03e07a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 5559261d9e..882fe7a68d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + } + + DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9795eb9406..bec270598b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1109,6 +1109,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + } + DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 04e8759815..4ddb798144 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +-- +2.27.0 + diff --git a/SOURCES/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch b/SOURCES/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch new file mode 100644 index 0000000..56af50f --- /dev/null +++ b/SOURCES/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch @@ -0,0 +1,93 @@ +From e626dc16d130c724c400b99a93daad0a9abeae59 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 01/18] Revert "redhat: Add hw_compat_4_2_extra and apply to + upstream machines" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [1/3] 47b7d9e5062f5e215d5ed1a3ecdc1a87ac3fa630 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:31:53 2022 +0000 + + Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" + + This reverts commit 66882f9a3230246409f3918424aca26add5c034a. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92) +Signed-off-by: Jon Maloy +--- + hw/i386/pc.c | 12 ------------ + hw/i386/pc_piix.c | 6 ------ + include/hw/i386/pc.h | 3 --- + 3 files changed, 21 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4c08a1971c..357257349b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -670,18 +670,6 @@ GlobalProperty pc_rhel_7_0_compat[] = { + }; + const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + +-/* +- * RHEL: These properties only apply to the RHEL exported machine types +- * pc-4.2/2.11 for the purpose to have a limited upstream machines support +- * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly +- * so that we can have some isolation against the upstream code. +- */ +-GlobalProperty hw_compat_4_2_extra[] = { +- /* By default enlarge the default virtio-net-pci ROM to 512KB. */ +- { "virtio-net-pci", "romsize", "0x80000" }, +-}; +-const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); +- + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c30057c443..7b7076cbc7 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -531,12 +531,6 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + * supported by RHEL, even if exported. + */ + m->deprecation_reason = "Not supported by RHEL"; +- /* +- * RHEL: Specific compat properties to have limited support for upstream +- * machines exported. +- */ +- compat_props_add(m->compat_props, hw_compat_4_2_extra, +- hw_compat_4_2_extra_len); + } + + /* RHEL: Export pc-4.2 */ +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9e8bfb69f8..4a593acb50 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -325,9 +325,6 @@ extern const size_t pc_rhel_7_1_compat_len; + extern GlobalProperty pc_rhel_7_0_compat[]; + extern const size_t pc_rhel_7_0_compat_len; + +-extern GlobalProperty hw_compat_4_2_extra[]; +-extern const size_t hw_compat_4_2_extra_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch b/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch new file mode 100644 index 0000000..2aaef33 --- /dev/null +++ b/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch @@ -0,0 +1,128 @@ +From 96edd15df257f1d1496397a6fac24b4316570d7e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 16:45:30 -0400 +Subject: [PATCH 1/3] Revert redhat: Add some devices for exporting upstream + machine types + +RH-Author: Jon Maloy +RH-MergeRequest: 156: Revert redhat: Add some devices for exporting upstream machine types +RH-Commit: [1/1] f25d0da3a181136917ead82f5a5c59efe3fa445a (jmaloy/qemu-kvm) +RH-Bugzilla: 2065043 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2065043 +Upstream: no + +Manual revert of commit 70d3924521c9bfd912bcf1a1fc76f49eb377de46, since +the directory structure looks different from rhel-av-8.4.0.z where +this commit is taken from. Besides, x86_64-softmmu.mak looks totally +different and should not be affected by this reversal. + +Signed-off-by: Jon Maloy +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 - + .../devices/x86_64-softmmu/x86_64-upstream-devices.mak | 4 ---- + hw/char/parallel.c | 9 --------- + hw/i386/pc_piix.c | 2 +- + hw/i386/pc_q35.c | 2 +- + hw/timer/hpet.c | 8 -------- + 6 files changed, 2 insertions(+), 24 deletions(-) + delete mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index fdbbdf9742..31ce08edab 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -1,5 +1,4 @@ + include ../rh-virtio.mak +-include x86_64-upstream-devices.mak + + CONFIG_AC97=y + CONFIG_ACPI=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +deleted file mode 100644 +index 2cd20f54d2..0000000000 +--- a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak ++++ /dev/null +@@ -1,4 +0,0 @@ +-# We need "isa-parallel" +-CONFIG_PARALLEL=y +-# We need "hpet" +-CONFIG_HPET=y +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index e5f108211b..b45e67bfbb 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,7 +29,6 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" +-#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -535,14 +534,6 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index ab6d03e07a..5f101c8748 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -966,7 +966,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; + m->default_display = "std"; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 882fe7a68d..73b0d0d317 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -633,7 +633,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 202e032524..9520471be2 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,14 +733,6 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +-- +2.35.1 + diff --git a/SOURCES/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch b/SOURCES/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch new file mode 100644 index 0000000..1b2051a --- /dev/null +++ b/SOURCES/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch @@ -0,0 +1,53 @@ +From 5bf8f1d69fea1225e927fbb3efe549a2a9d47d92 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 02/18] Revert "redhat: Enable FDC device for upstream machines + too" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [2/3] 4e3c945e3de9bb9d9a6d24115f0719168c9669fe (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:32:39 2022 +0000 + + Revert "redhat: Enable FDC device for upstream machines too" + + This reverts commit c4d1aa8bf21fe98da94a9cff30b7c25bed12c17f. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 63042ef030..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,10 +2341,7 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.") && +- /* Exported two upstream machine types allows FDC too */ +- strcmp(mc->name, "pc-i440fx-4.2") && +- strcmp(mc->name, "pc-i440fx-2.11")) { ++ if (!strstr(mc->name, "-rhel7.")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +-- +2.27.0 + diff --git a/SOURCES/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch b/SOURCES/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch new file mode 100644 index 0000000..27e3dc9 --- /dev/null +++ b/SOURCES/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch @@ -0,0 +1,191 @@ +From ee3cae3bb349469edcf725a1c5161521e95dcb9f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 03/18] Revert "redhat: Expose upstream machines pc-4.2 and + pc-2.11" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [3/3] 35cee68034580f81b3aa916921eecd2fdfa7dd15 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:34:58 2022 +0000 + + Revert "redhat: Expose upstream machines pc-4.2 and pc-2.11" + This reverts commit 618e2424edba499d52cd26cf8363bc2dd85ef149. + We no longer need these compat machines. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9) +Signed-off-by: Jon Maloy +--- + hw/i386/pc_piix.c | 37 ------------------------------------- + 1 file changed, 37 deletions(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 7b7076cbc7..f03a8f0db8 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -315,14 +315,6 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + +-/* +- * NOTE! Not all the upstream machine types are disabled for RHEL. For +- * providing a very limited support for upstream machine types, pc machines +- * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros +- * a bit messed up, but please read this comment first so that we can have a +- * rough understanding of what we're going to do. +- */ +- + #if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { +@@ -399,8 +391,6 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + +-#endif /* Disabled for Red Hat Enterprise Linux */ +- + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -465,10 +455,8 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -479,10 +467,8 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -497,10 +483,8 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -513,10 +497,8 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -525,15 +507,8 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); +- +- /* +- * RHEL: Mark all upstream machines as deprecated because they're not +- * supported by RHEL, even if exported. +- */ +- m->deprecation_reason = "Not supported by RHEL"; + } + +-/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -546,10 +521,8 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -562,10 +535,8 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -581,10 +552,8 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -593,10 +562,8 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -605,10 +572,8 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -617,11 +582,9 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + +-/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +-- +2.27.0 + diff --git a/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch b/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch new file mode 100644 index 0000000..39e152b --- /dev/null +++ b/SOURCES/kvm-Update-linux-headers-to-v6.0-rc4.patch @@ -0,0 +1,171 @@ +From 10fc28b61a6fba1e6dc44fd544cf31c7f313c622 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 28 Oct 2022 17:48:00 +0100 +Subject: [PATCH 05/42] Update linux headers to v6.0-rc4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [5/41] ca55f497d1bf1e72179330f8f613781bf999d898 + +Based on upstream commit d525f73f9186a5bc641b8caf0b2c9bb94e5aa963 +("Update linux headers to v6.0-rc4"), but this is focusing only on the +ZPCI and protected dump changes. + +Signed-off-by: Cédric Le Goater +--- + linux-headers/linux/kvm.h | 87 +++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio_zdev.h | 7 +++ + 2 files changed, 94 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0d05d02ee4..c65930288c 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1150,6 +1150,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_DISABLE_QUIRKS2 213 + /* #define KVM_CAP_VM_TSC_CONTROL 214 */ + #define KVM_CAP_SYSTEM_EVENT_DATA 215 ++#define KVM_CAP_S390_PROTECTED_DUMP 217 ++#define KVM_CAP_S390_ZPCI_OP 221 ++#define KVM_CAP_S390_CPU_TOPOLOGY 222 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1651,6 +1654,55 @@ struct kvm_s390_pv_unp { + __u64 tweak; + }; + ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ + enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, +@@ -1659,6 +1711,8 @@ enum pv_cmd_id { + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, + }; + + struct kvm_pv_cmd { +@@ -2066,4 +2120,37 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_XSAVE2 */ + #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + ++/* Available with KVM_CAP_S390_PROTECTED_DUMP */ ++#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) ++ ++/* Available with KVM_CAP_S390_ZPCI_OP */ ++#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) ++ + #endif /* __LINUX_KVM_H */ +diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h +index b4309397b6..77f2aff1f2 100644 +--- a/linux-headers/linux/vfio_zdev.h ++++ b/linux-headers/linux/vfio_zdev.h +@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ ++ /* End of version 1 */ ++ __u32 fh; /* PCI function handle */ ++ /* End of version 2 */ + }; + + /** +@@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ ++ /* End of version 1 */ ++ __u8 reserved; ++ __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ ++ /* End of version 2 */ + }; + + /** +-- +2.37.3 + diff --git a/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch new file mode 100644 index 0000000..7db7fd2 --- /dev/null +++ b/SOURCES/kvm-accel-introduce-accelerator-blocker-API.patch @@ -0,0 +1,349 @@ +From a5e7bb1f7a88efb5574266a76e80fd7604d19921 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:49:59 -0500 +Subject: [PATCH 04/11] accel: introduce accelerator blocker API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 9d3d7f9554974a79042c915763288cce07aef135 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:56 2022 -0500 + + accel: introduce accelerator blocker API + + This API allows the accelerators to prevent vcpus from issuing + new ioctls while execting a critical section marked with the + accel_ioctl_inhibit_begin/end functions. + + Note that all functions submitting ioctls must mark where the + ioctl is being called with accel_{cpu_}ioctl_begin/end(). + + This API requires the caller to always hold the BQL. + API documentation is in sysemu/accel-blocker.h + + Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt + (to minimize cache line bouncing) to keep avoid that new ioctls + run when the critical section starts, and a QemuEvent to wait + that all running ioctls finish. + + Signed-off-by: Emanuele Giuseppe Esposito + Reviewed-by: Philippe Mathieu-Daudé + Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + util/meson.build: files are missing in rhel 8.8.0 + namely int128.c, memalign.c and interval-tree.c + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ + accel/meson.build | 2 +- + hw/core/cpu-common.c | 2 + + include/hw/core/cpu.h | 3 + + include/sysemu/accel-blocker.h | 56 ++++++++++++ + util/meson.build | 2 +- + 6 files changed, 217 insertions(+), 2 deletions(-) + create mode 100644 accel/accel-blocker.c + create mode 100644 include/sysemu/accel-blocker.h + +diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c +new file mode 100644 +index 0000000000..1e7f423462 +--- /dev/null ++++ b/accel/accel-blocker.c +@@ -0,0 +1,154 @@ ++/* ++ * Lock to inhibit accelerator ioctls ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/thread.h" ++#include "qemu/main-loop.h" ++#include "hw/core/cpu.h" ++#include "sysemu/accel-blocker.h" ++ ++static QemuLockCnt accel_in_ioctl_lock; ++static QemuEvent accel_in_ioctl_event; ++ ++void accel_blocker_init(void) ++{ ++ qemu_lockcnt_init(&accel_in_ioctl_lock); ++ qemu_event_init(&accel_in_ioctl_event, false); ++} ++ ++void accel_ioctl_begin(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_end(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&accel_in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++void accel_cpu_ioctl_begin(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&cpu->in_ioctl_lock); ++} ++ ++void accel_cpu_ioctl_end(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&cpu->in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++static bool accel_has_to_wait(void) ++{ ++ CPUState *cpu; ++ bool needs_to_wait = false; ++ ++ CPU_FOREACH(cpu) { ++ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { ++ /* exit the ioctl, if vcpu is running it */ ++ qemu_cpu_kick(cpu); ++ needs_to_wait = true; ++ } ++ } ++ ++ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_inhibit_begin(void) ++{ ++ CPUState *cpu; ++ ++ /* ++ * We allow to inhibit only when holding the BQL, so we can identify ++ * when an inhibitor wants to issue an ioctl easily. ++ */ ++ g_assert(qemu_mutex_iothread_locked()); ++ ++ /* Block further invocations of the ioctls outside the BQL. */ ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_lock(&cpu->in_ioctl_lock); ++ } ++ qemu_lockcnt_lock(&accel_in_ioctl_lock); ++ ++ /* Keep waiting until there are running ioctls */ ++ while (true) { ++ ++ /* Reset event to FREE. */ ++ qemu_event_reset(&accel_in_ioctl_event); ++ ++ if (accel_has_to_wait()) { ++ /* ++ * If event is still FREE, and there are ioctls still in progress, ++ * wait. ++ * ++ * If an ioctl finishes before qemu_event_wait(), it will change ++ * the event state to SET. This will prevent qemu_event_wait() from ++ * blocking, but it's not a problem because if other ioctls are ++ * still running the loop will iterate once more and reset the event ++ * status to FREE so that it can wait properly. ++ * ++ * If an ioctls finishes while qemu_event_wait() is blocking, then ++ * it will be waken up, but also here the while loop makes sure ++ * to re-enter the wait if there are other running ioctls. ++ */ ++ qemu_event_wait(&accel_in_ioctl_event); ++ } else { ++ /* No ioctl is running */ ++ return; ++ } ++ } ++} ++ ++void accel_ioctl_inhibit_end(void) ++{ ++ CPUState *cpu; ++ ++ qemu_lockcnt_unlock(&accel_in_ioctl_lock); ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); ++ } ++} ++ +diff --git a/accel/meson.build b/accel/meson.build +index dfd808d2c8..801b4d44e8 100644 +--- a/accel/meson.build ++++ b/accel/meson.build +@@ -1,4 +1,4 @@ +-specific_ss.add(files('accel-common.c')) ++specific_ss.add(files('accel-common.c', 'accel-blocker.c')) + softmmu_ss.add(files('accel-softmmu.c')) + user_ss.add(files('accel-user.c')) + +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index 9e3241b430..b6e83acf0a 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -238,6 +238,7 @@ static void cpu_common_initfn(Object *obj) + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); ++ qemu_lockcnt_init(&cpu->in_ioctl_lock); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); +@@ -249,6 +250,7 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index e948e81f1a..49d9c73f97 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -383,6 +383,9 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + ++ /* Use by accel-block: CPU is executing an ioctl() */ ++ QemuLockCnt in_ioctl_lock; ++ + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); +diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h +new file mode 100644 +index 0000000000..72020529ef +--- /dev/null ++++ b/include/sysemu/accel-blocker.h +@@ -0,0 +1,56 @@ ++/* ++ * Accelerator blocking API, to prevent new ioctls from starting and wait the ++ * running ones finish. ++ * This mechanism differs from pause/resume_all_vcpus() in that it does not ++ * release the BQL. ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef ACCEL_BLOCKER_H ++#define ACCEL_BLOCKER_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/cpus.h" ++ ++extern void accel_blocker_init(void); ++ ++/* ++ * accel_{cpu_}ioctl_begin/end: ++ * Mark when ioctl is about to run or just finished. ++ * ++ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is ++ * called, preventing new ioctls to run. They will continue only after ++ * accel_ioctl_inibith_end(). ++ */ ++extern void accel_ioctl_begin(void); ++extern void accel_ioctl_end(void); ++extern void accel_cpu_ioctl_begin(CPUState *cpu); ++extern void accel_cpu_ioctl_end(CPUState *cpu); ++ ++/* ++ * accel_ioctl_inhibit_begin: start critical section ++ * ++ * This function makes sure that: ++ * 1) incoming accel_{cpu_}ioctl_begin() calls block ++ * 2) wait that all ioctls that were already running reach ++ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. ++ * ++ * This allows the caller to access shared data or perform operations without ++ * worrying of concurrent vcpus accesses. ++ */ ++extern void accel_ioctl_inhibit_begin(void); ++ ++/* ++ * accel_ioctl_inhibit_end: end critical section started by ++ * accel_ioctl_inhibit_begin() ++ * ++ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. ++ */ ++extern void accel_ioctl_inhibit_end(void); ++ ++#endif /* ACCEL_BLOCKER_H */ +diff --git a/util/meson.build b/util/meson.build +index 05b593055a..b5f153b0e8 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -48,6 +48,7 @@ util_ss.add(files('transactions.c')) + util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c')) + util_ss.add(files('guest-random.c')) + util_ss.add(files('yank.c')) ++util_ss.add(files('lockcnt.c')) + + if have_user + util_ss.add(files('selfmap.c')) +@@ -69,7 +70,6 @@ if have_block + util_ss.add(files('hexdump.c')) + util_ss.add(files('iova-tree.c')) + util_ss.add(files('iov.c', 'qemu-sockets.c', 'uri.c')) +- util_ss.add(files('lockcnt.c')) + util_ss.add(files('main-loop.c')) + util_ss.add(files('nvdimm-utils.c')) + util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) +-- +2.37.3 + diff --git a/SOURCES/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch b/SOURCES/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch new file mode 100644 index 0000000..9d2594f --- /dev/null +++ b/SOURCES/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch @@ -0,0 +1,78 @@ +From af082f3499de265d123157d097b5c84981e0aa63 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 15/18] acpi: fix OEM ID/OEM Table ID padding + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [7/10] 51ea859cbe12b5a902d529ab589d18757d98f71d (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 748c030f360a940fe0c9382c8ca1649096c3a80d +Author: Igor Mammedov +Date: Wed Jan 12 08:03:31 2022 -0500 + + acpi: fix OEM ID/OEM Table ID padding + + Commit [2] broke original '\0' padding of OEM ID and OEM Table ID + fields in headers of ACPI tables. While it doesn't have impact on + default values since QEMU uses 6 and 8 characters long values + respectively, it broke usecase where IDs are provided on QEMU CLI. + It shouldn't affect guest (but may cause licensing verification + issues in guest OS). + One of the broken usecases is user supplied SLIC table with IDs + shorter than max possible length, where [2] mangles IDs with extra + spaces in RSDT and FADT tables whereas guest OS expects those to + mirror the respective values of the used SLIC table. + + Fix it by replacing whitespace padding with '\0' padding in + accordance with [1] and expectations of guest OS + + 1) ACPI spec, v2.0b + 17.2 AML Grammar Definition + ... + //OEM ID of up to 6 characters. If the OEM ID is + //shorter than 6 characters, it can be terminated + //with a NULL character. + + 2) + Fixes: 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/707 + Reported-by: Dmitry V. Orekhov + Signed-off-by: Igor Mammedov + Cc: qemu-stable@nongnu.org + Message-Id: <20220112130332.1648664-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + Reviewed-by: Ani Sinha + Tested-by: Dmitry V. Orekhov dima.orekhov@gmail.com + +(cherry picked from commit 748c030f360a940fe0c9382c8ca1649096c3a80d) +Signed-off-by: Jon Maloy +--- + hw/acpi/aml-build.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index b3b3310df3..65148d5b9d 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1724,9 +1724,9 @@ void acpi_table_begin(AcpiTable *desc, GArray *array) + build_append_int_noprefix(array, 0, 4); /* Length */ + build_append_int_noprefix(array, desc->rev, 1); /* Revision */ + build_append_int_noprefix(array, 0, 1); /* Checksum */ +- build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */ ++ build_append_padded_str(array, desc->oem_id, 6, '\0'); /* OEMID */ + /* OEM Table ID */ +- build_append_padded_str(array, desc->oem_table_id, 8, ' '); ++ build_append_padded_str(array, desc->oem_table_id, 8, '\0'); + build_append_int_noprefix(array, 1, 4); /* OEM Revision */ + g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */ + build_append_int_noprefix(array, 1, 4); /* Creator Revision */ +-- +2.27.0 + diff --git a/SOURCES/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch b/SOURCES/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch new file mode 100644 index 0000000..a6b1151 --- /dev/null +++ b/SOURCES/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch @@ -0,0 +1,108 @@ +From 4e8fb957a349558648d5cddb80a89460bc97439e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 09/18] acpi: fix QEMU crash when started with SLIC table +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [1/10] 0c34e80346c33da4f220d9c486b120c35005144e (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Author: Igor Mammedov +Date: Mon Dec 27 14:31:17 2021 -0500 + + acpi: fix QEMU crash when started with SLIC table + + if QEMU is started with used provided SLIC table blob, + + -acpitable sig=SLIC,oem_id='CRASH ',oem_table_id="ME",oem_rev=00002210,asl_compiler_id="",asl_compiler_rev=00000000,data=/dev/null + it will assert with: + + hw/acpi/aml-build.c:61:build_append_padded_str: assertion failed: (len <= maxlen) + + and following backtrace: + + ... + build_append_padded_str (array=0x555556afe320, str=0x555556afdb2e "CRASH ME", maxlen=0x6, pad=0x20) at hw/acpi/aml-build.c:61 + acpi_table_begin (desc=0x7fffffffd1b0, array=0x555556afe320) at hw/acpi/aml-build.c:1727 + build_fadt (tbl=0x555556afe320, linker=0x555557ca3830, f=0x7fffffffd318, oem_id=0x555556afdb2e "CRASH ME", oem_table_id=0x555556afdb34 "ME") at hw/acpi/aml-build.c:2064 + ... + + which happens due to acpi_table_begin() expecting NULL terminated + oem_id and oem_table_id strings, which is normally the case, but + in case of user provided SLIC table, oem_id points to table's blob + directly and as result oem_id became longer than expected. + + Fix issue by handling oem_id consistently and make acpi_get_slic_oem() + return NULL terminated strings. + + PS: + After [1] refactoring, oem_id semantics became inconsistent, where + NULL terminated string was coming from machine and old way pointer + into byte array coming from -acpitable option. That used to work + since build_header() wasn't expecting NULL terminated string and + blindly copied the 1st 6 bytes only. + + However commit [2] broke that by replacing build_header() with + acpi_table_begin(), which was expecting NULL terminated string + and was checking oem_id size. + + 1) 602b45820 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + 2) + Fixes: 4b56e1e4eb08 ("acpi: build_fadt: use acpi_table_begin()/acpi_table_end() instead of build_header()") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/786 + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-2-imammedo@redhat.com> + Reviewed-by: Philippe Mathieu-Daudé + Tested-by: Denis Lisov + Tested-by: Alexander Tsoy + Cc: qemu-stable@nongnu.org + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Signed-off-by: Jon Maloy +--- + hw/acpi/core.c | 4 ++-- + hw/i386/acpi-build.c | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index 1e004d0078..3e811bf03c 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -345,8 +345,8 @@ int acpi_get_slic_oem(AcpiSlicOem *oem) + struct acpi_table_header *hdr = (void *)(u - sizeof(hdr->_length)); + + if (memcmp(hdr->sig, "SLIC", 4) == 0) { +- oem->id = hdr->oem_id; +- oem->table_id = hdr->oem_table_id; ++ oem->id = g_strndup(hdr->oem_id, 6); ++ oem->table_id = g_strndup(hdr->oem_table_id, 8); + return 0; + } + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a4478e77b7..acc4869db0 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2726,6 +2726,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + + /* Cleanup memory that's no longer used. */ + g_array_free(table_offsets, true); ++ g_free(slic_oem.id); ++ g_free(slic_oem.table_id); + } + + static void acpi_ram_update(MemoryRegion *mr, GArray *data) +-- +2.27.0 + diff --git a/SOURCES/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch b/SOURCES/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch new file mode 100644 index 0000000..2be41b6 --- /dev/null +++ b/SOURCES/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch @@ -0,0 +1,140 @@ +From c9ceb175667cdeead59384a97a812367ae19c570 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 06/18] acpi: pcihp: pcie: set power on cap on parent slot + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [2/2] d883872647a6e90ec573140b2c171f3f53b600ab (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 6b0969f1ec825984cd74619f0730be421b0c46fb +Author: Igor Mammedov +Date: Tue Mar 1 10:11:59 2022 -0500 + + acpi: pcihp: pcie: set power on cap on parent slot + + on creation a PCIDevice has power turned on at the end of pci_qdev_realize() + however later on if PCIe slot isn't populated with any children + it's power is turned off. It's fine if native hotplug is used + as plug callback will power slot on among other things. + However when ACPI hotplug is enabled it replaces native PCIe plug + callbacks with ACPI specific ones (acpi_pcihp_device_*plug_cb) and + as result slot stays powered off. It works fine as ACPI hotplug + on guest side takes care of enumerating/initializing hotplugged + device. But when later guest is migrated, call chain introduced by] + commit d5daff7d312 (pcie: implement slot power control for pcie root ports) + + pcie_cap_slot_post_load() + -> pcie_cap_update_power() + -> pcie_set_power_device() + -> pci_set_power() + -> pci_update_mappings() + + will disable earlier initialized BARs for the hotplugged device + in powered off slot due to commit 23786d13441 (pci: implement power state) + which disables BARs if power is off. + + Fix it by setting PCI_EXP_SLTCTL_PCC to PCI_EXP_SLTCTL_PWR_ON + on slot (root port/downstream port) at the time a device + hotplugged into it. As result PCI_EXP_SLTCTL_PWR_ON is migrated + to target and above call chain keeps device plugged into it + powered on. + + Fixes: d5daff7d312 ("pcie: implement slot power control for pcie root ports") + Fixes: 23786d13441 ("pci: implement power state") + Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2053584 + Suggested-by: "Michael S. Tsirkin" + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 6b0969f1ec825984cd74619f0730be421b0c46fb) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 12 +++++++++++- + hw/pci/pcie.c | 11 +++++++++++ + include/hw/pci/pcie.h | 1 + + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index a5e182dd3a..be0e846b34 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -32,6 +32,7 @@ + #include "hw/pci/pci_bridge.h" + #include "hw/pci/pci_host.h" + #include "hw/pci/pcie_port.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + #include "hw/i386/acpi-build.h" + #include "hw/acpi/acpi.h" + #include "hw/pci/pci_bus.h" +@@ -341,6 +342,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + { + PCIDevice *pdev = PCI_DEVICE(dev); + int slot = PCI_SLOT(pdev->devfn); ++ PCIDevice *bridge; ++ PCIBus *bus; + int bsel; + + /* Don't send event when device is enabled during qemu machine creation: +@@ -370,7 +373,14 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + return; + } + +- bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); ++ bus = pci_get_bus(pdev); ++ bridge = pci_bridge_get_device(bus); ++ if (object_dynamic_cast(OBJECT(bridge), TYPE_PCIE_ROOT_PORT) || ++ object_dynamic_cast(OBJECT(bridge), TYPE_XIO3130_DOWNSTREAM)) { ++ pcie_cap_slot_enable_power(bridge); ++ } ++ ++ bsel = acpi_pcihp_get_bsel(bus); + g_assert(bsel >= 0); + s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); + acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index d7d73a31e4..996f0e24fe 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -366,6 +366,17 @@ static void hotplug_event_clear(PCIDevice *dev) + } + } + ++void pcie_cap_slot_enable_power(PCIDevice *dev) ++{ ++ uint8_t *exp_cap = dev->config + dev->exp.exp_cap; ++ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); ++ ++ if (sltcap & PCI_EXP_SLTCAP_PCP) { ++ pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL, ++ PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON); ++ } ++} ++ + static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) + { + bool *power = opaque; +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 6063bee0ec..c27368d077 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -112,6 +112,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + int pcie_cap_slot_post_load(void *opaque, int version_id); + void pcie_cap_slot_push_attention_button(PCIDevice *dev); ++void pcie_cap_slot_enable_power(PCIDevice *dev); + + void pcie_cap_root_init(PCIDevice *dev); + void pcie_cap_root_reset(PCIDevice *dev); +-- +2.27.0 + diff --git a/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch b/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch new file mode 100644 index 0000000..d18989a --- /dev/null +++ b/SOURCES/kvm-acpi-validate-hotplug-selector-on-access.patch @@ -0,0 +1,51 @@ +From 529a5d908f5d16714b8ae0a51eaaaa84994dfae8 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Tue, 21 Dec 2021 09:45:44 -0500 +Subject: [PATCH 1/2] acpi: validate hotplug selector on access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 97: acpi: validate hotplug selector on access +RH-Commit: [1/1] 79bcfb0df0091e2b716d2e1c545f047b3409c26c (jmaloy/qemu-kvm) +RH-Bugzilla: 2036580 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Igor Mammedov + +When bus is looked up on a pci write, we didn't +validate that the lookup succeeded. +Fuzzers thus can trigger QEMU crash by dereferencing the NULL +bus pointer. + +Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") +Fixes: CVE-2021-4158 +Cc: "Igor Mammedov" +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Ani Sinha +(cherry picked from commit 9bd6565ccee68f72d5012e24646e12a1c662827e) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index 30405b5113..a5e182dd3a 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -491,6 +491,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, + } + + bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); ++ if (!bus) { ++ break; ++ } + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + Object *o = OBJECT(kid->child); + PCIDevice *dev = PCI_DEVICE(o); +-- +2.27.0 + diff --git a/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch new file mode 100644 index 0000000..e3b79cf --- /dev/null +++ b/SOURCES/kvm-aio-wait-switch-to-smp_mb__after_rmw.patch @@ -0,0 +1,50 @@ +From 953c5c0982b61b0a3f8f03452844b5487eb22fc7 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:13:17 -0500 +Subject: [PATCH 06/13] aio-wait: switch to smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [6/10] 9f30f97754139ffd18d36b2350f9ed4e59ac496e + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit b532526a07ef3b903ead2e055fe6cc87b41057a3 +Author: Paolo Bonzini +Date: Fri Mar 3 11:03:52 2023 +0100 + + aio-wait: switch to smp_mb__after_rmw() + + The barrier comes after an atomic increment, so it is enough to use + smp_mb__after_rmw(); this avoids a double barrier on x86 systems. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index 54840f8622..03b6394c78 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -82,7 +82,7 @@ extern AioWait global_aio_wait; + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ + /* Paired with smp_mb in aio_wait_kick(). */ \ +- smp_mb(); \ ++ smp_mb__after_rmw(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +-- +2.37.3 + diff --git a/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch b/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch new file mode 100644 index 0000000..9a9ae00 --- /dev/null +++ b/SOURCES/kvm-aio_wait_kick-add-missing-memory-barrier.patch @@ -0,0 +1,86 @@ +From d7eae0ff4c7f7f7bf10f10272adf7c6971c0db9b Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 09:26:35 -0500 +Subject: [PATCH 01/13] aio_wait_kick: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [1/10] eb774aee79864052e14e706d931e52e7bd1162c8 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 7455ff1aa01564cc175db5b2373e610503ad4411 +Author: Emanuele Giuseppe Esposito +Date: Tue May 24 13:30:54 2022 -0400 + + aio_wait_kick: add missing memory barrier + + It seems that aio_wait_kick always required a memory barrier + or atomic operation in the caller, but nobody actually + took care of doing it. + + Let's put the barrier in the function instead, and pair it + with another one in AIO_WAIT_WHILE. Read aio_wait_kick() + comment for further explanation. + + Suggested-by: Paolo Bonzini + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20220524173054.12651-1-eesposit@redhat.com> + Reviewed-by: Vladimir Sementsov-Ogievskiy + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Kevin Wolf + +Signed-off-by: Emanuele Giuseppe Esposito +--- + include/block/aio-wait.h | 2 ++ + util/aio-wait.c | 16 +++++++++++++++- + 2 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index b39eefb38d..54840f8622 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -81,6 +81,8 @@ extern AioWait global_aio_wait; + AioContext *ctx_ = (ctx); \ + /* Increment wait_->num_waiters before evaluating cond. */ \ + qatomic_inc(&wait_->num_waiters); \ ++ /* Paired with smp_mb in aio_wait_kick(). */ \ ++ smp_mb(); \ + if (ctx_ && in_aio_context_home_thread(ctx_)) { \ + while ((cond)) { \ + aio_poll(ctx_, true); \ +diff --git a/util/aio-wait.c b/util/aio-wait.c +index bdb3d3af22..98c5accd29 100644 +--- a/util/aio-wait.c ++++ b/util/aio-wait.c +@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque) + + void aio_wait_kick(void) + { +- /* The barrier (or an atomic op) is in the caller. */ ++ /* ++ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have: ++ * write(condition); ++ * aio_wait_kick() { ++ * smp_mb(); ++ * read(num_waiters); ++ * } ++ * ++ * And in AIO_WAIT_WHILE: ++ * write(num_waiters); ++ * smp_mb(); ++ * read(condition); ++ */ ++ smp_mb(); ++ + if (qatomic_read(&global_aio_wait.num_waiters)) { + aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch new file mode 100644 index 0000000..5338a8d --- /dev/null +++ b/SOURCES/kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch @@ -0,0 +1,66 @@ +From 187eb7a418af93375e42298d06e231e2bec3cf00 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:42 -0500 +Subject: [PATCH 10/13] async: clarify usage of barriers in the polling case + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [10/10] 3be07ccc6137a0336becfe63a818d9cbadb38e9c + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 6229438cca037d42f44a96d38feb15cb102a444f +Author: Paolo Bonzini +Date: Mon Mar 6 10:43:52 2023 +0100 + + async: clarify usage of barriers in the polling case + + Explain that aio_context_notifier_poll() relies on + aio_notify_accept() to catch all the memory writes that were + done before ctx->notified was set to true. + + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 795fe699b6..2a63bf90f2 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -463,8 +463,9 @@ void aio_notify_accept(AioContext *ctx) + qatomic_set(&ctx->notified, false); + + /* +- * Write ctx->notified before reading e.g. bh->flags. Pairs with smp_wmb +- * in aio_notify. ++ * Order reads of ctx->notified (in aio_context_notifier_poll()) and the ++ * above clearing of ctx->notified before reads of e.g. bh->flags. Pairs ++ * with smp_wmb() in aio_notify. + */ + smp_mb(); + } +@@ -487,6 +488,11 @@ static bool aio_context_notifier_poll(void *opaque) + EventNotifier *e = opaque; + AioContext *ctx = container_of(e, AioContext, notifier); + ++ /* ++ * No need for load-acquire because we just want to kick the ++ * event loop. aio_notify_accept() takes care of synchronizing ++ * the event loop with the producers. ++ */ + return qatomic_read(&ctx->notified); + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch new file mode 100644 index 0000000..aea20ea --- /dev/null +++ b/SOURCES/kvm-async-update-documentation-of-the-memory-barriers.patch @@ -0,0 +1,111 @@ +From ea3856bb545d19499602830cdc3076d83a981e7a Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:36 -0500 +Subject: [PATCH 09/13] async: update documentation of the memory barriers + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [9/10] d471da2acf7a107cf75f3327c5e8d7456307160e + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 8dd48650b43dfde4ebea34191ac267e474bcc29e +Author: Paolo Bonzini +Date: Mon Mar 6 10:15:06 2023 +0100 + + async: update documentation of the memory barriers + + Ever since commit 8c6b0356b539 ("util/async: make bh_aio_poll() O(1)", + 2020-02-22), synchronization between qemu_bh_schedule() and aio_bh_poll() + is happening when the bottom half is enqueued in the bh_list; not + when the flags are set. Update the documentation to match. + + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/async.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 6f6717a34b..795fe699b6 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -71,14 +71,21 @@ static void aio_bh_enqueue(QEMUBH *bh, unsigned new_flags) + unsigned old_flags; + + /* +- * The memory barrier implicit in qatomic_fetch_or makes sure that: +- * 1. idle & any writes needed by the callback are done before the +- * locations are read in the aio_bh_poll. +- * 2. ctx is loaded before the callback has a chance to execute and bh +- * could be freed. ++ * Synchronizes with atomic_fetch_and() in aio_bh_dequeue(), ensuring that ++ * insertion starts after BH_PENDING is set. + */ + old_flags = qatomic_fetch_or(&bh->flags, BH_PENDING | new_flags); ++ + if (!(old_flags & BH_PENDING)) { ++ /* ++ * At this point the bottom half becomes visible to aio_bh_poll(). ++ * This insertion thus synchronizes with QSLIST_MOVE_ATOMIC in ++ * aio_bh_poll(), ensuring that: ++ * 1. any writes needed by the callback are visible from the callback ++ * after aio_bh_dequeue() returns bh. ++ * 2. ctx is loaded before the callback has a chance to execute and bh ++ * could be freed. ++ */ + QSLIST_INSERT_HEAD_ATOMIC(&ctx->bh_list, bh, next); + } + +@@ -97,11 +104,8 @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) + QSLIST_REMOVE_HEAD(head, next); + + /* +- * The qatomic_and is paired with aio_bh_enqueue(). The implicit memory +- * barrier ensures that the callback sees all writes done by the scheduling +- * thread. It also ensures that the scheduling thread sees the cleared +- * flag before bh->cb has run, and thus will call aio_notify again if +- * necessary. ++ * Synchronizes with qatomic_fetch_or() in aio_bh_enqueue(), ensuring that ++ * the removal finishes before BH_PENDING is reset. + */ + *flags = qatomic_fetch_and(&bh->flags, + ~(BH_PENDING | BH_SCHEDULED | BH_IDLE)); +@@ -148,6 +152,7 @@ int aio_bh_poll(AioContext *ctx) + BHListSlice *s; + int ret = 0; + ++ /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue(). */ + QSLIST_MOVE_ATOMIC(&slice.bh_list, &ctx->bh_list); + QSIMPLEQ_INSERT_TAIL(&ctx->bh_slice_list, &slice, next); + +@@ -437,15 +442,15 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* +- * Write e.g. bh->flags before writing ctx->notified. Pairs with smp_mb in +- * aio_notify_accept. ++ * Write e.g. ctx->bh_list before writing ctx->notified. Pairs with ++ * smp_mb() in aio_notify_accept(). + */ + smp_wmb(); + qatomic_set(&ctx->notified, true); + + /* +- * Write ctx->notified before reading ctx->notify_me. Pairs +- * with smp_mb in aio_ctx_prepare or aio_poll. ++ * Write ctx->notified (and also ctx->bh_list) before reading ctx->notify_me. ++ * Pairs with smp_mb() in aio_ctx_prepare or aio_poll. + */ + smp_mb(); + if (qatomic_read(&ctx->notify_me)) { +-- +2.37.3 + diff --git a/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch b/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch new file mode 100644 index 0000000..b29289b --- /dev/null +++ b/SOURCES/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch @@ -0,0 +1,71 @@ +From 60da56e3685969493ae483c3cc2c66af13d00baf Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 10 Aug 2022 14:57:18 +0200 +Subject: [PATCH 1/3] backends/hostmem: Fix support of memory-backend-memfd in + qemu_maxrampagesize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 221: backends/hostmem: Fix support of memory-backend-memfd in qemu_maxrampagesize() +RH-Bugzilla: 2117149 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] b5a1047750af32c0a261b8385ea0e819eb16681a + +It is currently not possible yet to use "memory-backend-memfd" on s390x +with hugepages enabled. This problem is caused by qemu_maxrampagesize() +not taking memory-backend-memfd objects into account yet, so the code +in s390_memory_init() fails to enable the huge page support there via +s390_set_max_pagesize(). Fix it by generalizing the code, so that it +looks at qemu_ram_pagesize(memdev->mr.ram_block) instead of re-trying +to get the information from the filesystem. + +Suggested-by: David Hildenbrand +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2116496 +Message-Id: <20220810125720.3849835-2-thuth@redhat.com> +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit 8be934b70e923104da883b990dee18f02552d40e) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2117149 +[clg: Resolved conflict on qemu_real_host_page_size() ] +Signed-off-by: Cédric Le Goater +--- + backends/hostmem.c | 14 ++------------ + 1 file changed, 2 insertions(+), 12 deletions(-) + +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 4c05862ed5..0c4654ea85 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -305,22 +305,12 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend) + return backend->is_mapped; + } + +-#ifdef __linux__ + size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) + { +- Object *obj = OBJECT(memdev); +- char *path = object_property_get_str(obj, "mem-path", NULL); +- size_t pagesize = qemu_mempath_getpagesize(path); +- +- g_free(path); ++ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block); ++ g_assert(pagesize >= qemu_real_host_page_size); + return pagesize; + } +-#else +-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) +-{ +- return qemu_real_host_page_size; +-} +-#endif + + static void + host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) +-- +2.35.3 + diff --git a/SOURCES/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch b/SOURCES/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch new file mode 100644 index 0000000..eb0f3cf --- /dev/null +++ b/SOURCES/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch @@ -0,0 +1,63 @@ +From b21fa5ecd9acf2b91839a2915fb4bb39dac4c803 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:33 +0100 +Subject: [PATCH 2/5] block: Lock AioContext for drain_end in blockdev-reopen + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [1/2] 98de3b5987f88ea6b4b503f623d6c4475574e037 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +bdrv_subtree_drained_end() requires the caller to hold the AioContext +lock for the drained node. Not doing this for nodes outside of the main +AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and +tries to temporarily release the lock. + +Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659 +Reported-by: Qing Wang +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-2-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index b35072644e..565f6a81fd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; + GSList *drained = NULL; ++ GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); ++ for (p = drained; p; p = p->next) { ++ BlockDriverState *bs = p->data; ++ AioContext *ctx = bdrv_get_aio_context(bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_subtree_drained_end(bs); ++ aio_context_release(ctx); ++ } ++ g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.27.0 + diff --git a/SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch b/SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch new file mode 100644 index 0000000..7ff8e7e --- /dev/null +++ b/SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch @@ -0,0 +1,78 @@ +From 6348063b91b2370cc27153fd58fd11a6681631f6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:53 +0100 +Subject: [PATCH 22/24] block: Make bdrv_refresh_limits() non-recursive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [1/3] 1a1fe37f8d8f0344dd8639d6cc9d884d1aff9096 +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +bdrv_refresh_limits() recurses down to the node's children. That does +not seem necessary: When we refresh limits on some node, and then +recurse down and were to change one of its children's BlockLimits, then +that would mean we noticed the changed limits by pure chance. The fact +that we refresh the parent's limits has nothing to do with it, so the +reason for the change probably happened before this point in time, and +we should have refreshed the limits then. + +Consequently, we should actually propagate block limits changes upwards, +not downwards.  That is a separate and pre-existing issue, though, and +so will not be addressed in this patch. + +The problem with recursing is that bdrv_refresh_limits() is not atomic. +It begins with zeroing BDS.bl, and only then sets proper, valid limits. +If we do not drain all nodes whose limits are refreshed, then concurrent +I/O requests can encounter invalid request_alignment values and crash +qemu. Therefore, a recursing bdrv_refresh_limits() requires the whole +subtree to be drained, which is currently not ensured by most callers. + +A non-recursive bdrv_refresh_limits() only requires the node in question +to not receive I/O requests, and this is done by most callers in some +way or another: +- bdrv_open_driver() deals with a new node with no parents yet +- bdrv_set_file_or_backing_noperm() acts on a drained node +- bdrv_reopen_commit() acts only on drained nodes +- bdrv_append() should in theory require the node to be drained; in + practice most callers just lock the AioContext, which should at least + be enough to prevent concurrent I/O requests from accessing invalid + limits + +So we can resolve the bug by making bdrv_refresh_limits() non-recursive. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1879437 +Signed-off-by: Hanna Reitz +Reviewed-by: Eric Blake +Message-Id: <20220216105355.30729-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 4d378bbd831bdd2f6e6adcd4ea5b77b6effaa627) +Signed-off-by: Hanna Reitz +--- + block/io.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 4e4cb556c5..c3e7301613 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -189,10 +189,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) + QLIST_FOREACH(c, &bs->children, next) { + if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) + { +- bdrv_refresh_limits(c->bs, tran, errp); +- if (*errp) { +- return; +- } + bdrv_merge_limits(&bs->bl, &c->bs->bl); + have_limits = true; + } +-- +2.35.3 + diff --git a/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch new file mode 100644 index 0000000..52d37d8 --- /dev/null +++ b/SOURCES/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch @@ -0,0 +1,129 @@ +From bf4c15a3debbe68b6eb25c52174843470a9c014f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:12 +0000 +Subject: [PATCH 3/6] block-backend: prevent dangling BDS pointers across + aio_poll() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [1/2] da5a59eddff0dc10be7de8e291fa675143d11d73 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +The BlockBackend root child can change when aio_poll() is invoked. This +happens when a temporary filter node is removed upon blockjob +completion, for example. + +Functions in block/block-backend.c must be aware of this when using a +blk_bs() pointer across aio_poll() because the BlockDriverState refcnt +may reach 0, resulting in a stale pointer. + +One example is scsi_device_purge_requests(), which calls blk_drain() to +wait for in-flight requests to cancel. If the backup blockjob is active, +then the BlockBackend root child is a temporary filter BDS owned by the +blockjob. The blockjob can complete during bdrv_drained_begin() and the +last reference to the BDS is released when the temporary filter node is +removed. This results in a use-after-free when blk_drain() calls +bdrv_drained_end(bs) on the dangling pointer. + +Explicitly hold a reference to bs across block APIs that invoke +aio_poll(). + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) +Signed-off-by: Stefan Hajnoczi +--- + block/block-backend.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 12ef80ea17..23e727199b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { +- bs = blk_bs(blk); ++ BlockDriverState *bs = blk_bs(blk); ++ ++ /* ++ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for ++ * example, if a temporary filter node is removed by a blockjob. ++ */ ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + + blk_update_root_state(blk); +@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) + BlockDriverState *bs = blk_bs(blk); + + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + +@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) + + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + int ret; + + if (bs) { ++ bdrv_ref(bs); ++ + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { ++ bdrv_unref(bs); + return ret; + } + } +@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } ++ ++ bdrv_unref(bs); + } + + blk->ctx = new_context; +@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch new file mode 100644 index 0000000..c1ee128 --- /dev/null +++ b/SOURCES/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch @@ -0,0 +1,56 @@ +From 4c6eff78f4b31ec4bd7b42440396760d19fde63e Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 17:59:59 +0100 +Subject: [PATCH 6/7] block/io: Update BSC only if want_zero is true + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [1/2] a202de1f52110d1e871c3b5b58f2d9e9b5d17570 +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +We update the block-status cache whenever we get new information from a +bdrv_co_block_status() call to the block driver. However, if we have +passed want_zero=false to that call, it may flag areas containing zeroes +as data, and so we would update the block-status cache with wrong +information. + +Therefore, we should not update the cache with want_zero=false. + +Reported-by: Nir Soffer +Fixes: 0bc329fbb00 ("block: block-status cache for data regions") +Reviewed-by: Nir Soffer +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-2-hreitz@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) +Signed-off-by: Hanna Reitz +--- + block/io.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/io.c b/block/io.c +index bb0a254def..4e4cb556c5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. ++ * ++ * Check want_zero, because we only want to update the cache when we ++ * have accurate information about what is zero and what is data. + */ +- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && ++ if (want_zero && ++ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* +-- +2.27.0 + diff --git a/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch b/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch new file mode 100644 index 0000000..1756d88 --- /dev/null +++ b/SOURCES/kvm-block-mirror-Do-not-wait-for-active-writes.patch @@ -0,0 +1,153 @@ +From 192f956f2b0761f270070555f8feb1f0544e5558 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:48 +0100 +Subject: [PATCH 01/11] block/mirror: Do not wait for active writes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [1/3] 652d1e55b954f13eaec2c86f58735d4942837e16 + +Waiting for all active writes to settle before daring to create a +background copying operation means that we will never do background +operations while the guest does anything (in write-blocking mode), and +therefore cannot converge. Yes, we also will not diverge, but actually +converging would be even nicer. + +It is unclear why we did decide to wait for all active writes to settle +before creating a background operation, but it just does not seem +necessary. Active writes will put themselves into the in_flight bitmap +and thus properly block actually conflicting background requests. + +It is important for active requests to wait on overlapping background +requests, which we do in active_write_prepare(). However, so far it was +not documented why it is important. Add such documentation now, and +also to the other call of mirror_wait_on_conflicts(), so that it becomes +more clear why and when requests need to actively wait for other +requests to settle. + +Another thing to note is that of course we need to ensure that there are +no active requests when the job completes, but that is done by virtue of +the BDS being drained anyway, so there cannot be any active requests at +that point. + +With this change, we will need to explicitly keep track of how many +bytes are in flight in active requests so that +job_progress_set_remaining() in mirror_run() can set the correct number +of remaining bytes. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2123297 +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-2-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d69a879bdf1aed586478eaa161ee064fe1b92f1a) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 37 ++++++++++++++++++++++++++++++------- + 1 file changed, 30 insertions(+), 7 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index efec2c7674..282f428cb7 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -81,6 +81,7 @@ typedef struct MirrorBlockJob { + int max_iov; + bool initial_zeroing_ongoing; + int in_active_write_counter; ++ int64_t active_write_bytes_in_flight; + bool prepared; + bool in_drain; + } MirrorBlockJob; +@@ -493,6 +494,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) + } + bdrv_dirty_bitmap_unlock(s->dirty_bitmap); + ++ /* ++ * Wait for concurrent requests to @offset. The next loop will limit the ++ * copied area based on in_flight_bitmap so we only copy an area that does ++ * not overlap with concurrent in-flight requests. Still, we would like to ++ * copy something, so wait until there are at least no more requests to the ++ * very beginning of the area. ++ */ + mirror_wait_on_conflicts(NULL, s, offset, 1); + + job_pause_point(&s->common.job); +@@ -993,12 +1001,6 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + int64_t cnt, delta; + bool should_complete; + +- /* Do not start passive operations while there are active +- * writes in progress */ +- while (s->in_active_write_counter) { +- mirror_wait_for_any_operation(s, true); +- } +- + if (s->ret < 0) { + ret = s->ret; + goto immediate_exit; +@@ -1015,7 +1017,9 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + /* cnt is the number of dirty bytes remaining and s->bytes_in_flight is + * the number of bytes currently being processed; together those are + * the current remaining operation length */ +- job_progress_set_remaining(&s->common.job, s->bytes_in_flight + cnt); ++ job_progress_set_remaining(&s->common.job, ++ s->bytes_in_flight + cnt + ++ s->active_write_bytes_in_flight); + + /* Note that even when no rate limit is applied we need to yield + * periodically with no pending I/O so that bdrv_drain_all() returns. +@@ -1073,6 +1077,10 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + + s->in_drain = true; + bdrv_drained_begin(bs); ++ ++ /* Must be zero because we are drained */ ++ assert(s->in_active_write_counter == 0); ++ + cnt = bdrv_get_dirty_count(s->dirty_bitmap); + if (cnt > 0 || mirror_flush(s) < 0) { + bdrv_drained_end(bs); +@@ -1306,6 +1314,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + } + + job_progress_increase_remaining(&job->common.job, bytes); ++ job->active_write_bytes_in_flight += bytes; + + switch (method) { + case MIRROR_METHOD_COPY: +@@ -1327,6 +1336,7 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod method, + abort(); + } + ++ job->active_write_bytes_in_flight -= bytes; + if (ret >= 0) { + job_progress_update(&job->common.job, bytes); + } else { +@@ -1375,6 +1385,19 @@ static MirrorOp *coroutine_fn active_write_prepare(MirrorBlockJob *s, + + s->in_active_write_counter++; + ++ /* ++ * Wait for concurrent requests affecting the area. If there are already ++ * running requests that are copying off now-to-be stale data in the area, ++ * we must wait for them to finish before we begin writing fresh data to the ++ * target so that the write operations appear in the correct order. ++ * Note that background requests (see mirror_iteration()) in contrast only ++ * wait for conflicting requests at the start of the dirty area, and then ++ * (based on the in_flight_bitmap) truncate the area to copy so it will not ++ * conflict with any requests beyond that. For active writes, however, we ++ * cannot truncate that area. The request from our parent must be blocked ++ * until the area is copied in full. Therefore, we must wait for the whole ++ * area to become free of concurrent requests. ++ */ + mirror_wait_on_conflicts(op, s, offset, bytes); + + bitmap_set(s->in_flight_bitmap, start_chunk, end_chunk - start_chunk); +-- +2.37.3 + diff --git a/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch b/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch new file mode 100644 index 0000000..457788e --- /dev/null +++ b/SOURCES/kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch @@ -0,0 +1,76 @@ +From 57c79ed20cb73aa9aa4dd7487379b85ea3f936f6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:49 +0100 +Subject: [PATCH 02/11] block/mirror: Drop mirror_wait_for_any_operation() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [2/3] dec37883bcc491441ae08d9592d1ec26a47765c0 + +mirror_wait_for_free_in_flight_slot() is the only remaining user of +mirror_wait_for_any_operation(), so inline the latter into the former. + +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-3-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit eb994912993077f178ccb43b20e422ecf9ae4ac7) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 282f428cb7..6b02555ad7 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -304,19 +304,21 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, + } + + static inline void coroutine_fn +-mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) ++mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) + { + MirrorOp *op; + + QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +- /* Do not wait on pseudo ops, because it may in turn wait on ++ /* ++ * Do not wait on pseudo ops, because it may in turn wait on + * some other operation to start, which may in fact be the + * caller of this function. Since there is only one pseudo op + * at any given time, we will always find some real operation +- * to wait on. */ +- if (!op->is_pseudo_op && op->is_in_flight && +- op->is_active_write == active) +- { ++ * to wait on. ++ * Also, do not wait on active operations, because they do not ++ * use up in-flight slots. ++ */ ++ if (!op->is_pseudo_op && op->is_in_flight && !op->is_active_write) { + qemu_co_queue_wait(&op->waiting_requests, NULL); + return; + } +@@ -324,13 +326,6 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) + abort(); + } + +-static inline void coroutine_fn +-mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +-{ +- /* Only non-active operations use up in-flight slots */ +- mirror_wait_for_any_operation(s, false); +-} +- + /* Perform a mirror copy operation. + * + * *op->bytes_handled is set to the number of bytes copied after and +-- +2.37.3 + diff --git a/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch b/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch new file mode 100644 index 0000000..b353bd1 --- /dev/null +++ b/SOURCES/kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch @@ -0,0 +1,75 @@ +From b1f5aa5a342a25dc558ee9d435fed0643fe5155f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 9 Nov 2022 17:54:50 +0100 +Subject: [PATCH 03/11] block/mirror: Fix NULL s->job in active writes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 246: block/mirror: Make active mirror progress even under full load +RH-Bugzilla: 2125119 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [3/3] 49d7ebd15667151a6e14228a8260cfdd0aa27a78 + +There is a small gap in mirror_start_job() before putting the mirror +filter node into the block graph (bdrv_append() call) and the actual job +being created. Before the job is created, MirrorBDSOpaque.job is NULL. + +It is possible that requests come in when bdrv_drained_end() is called, +and those requests would see MirrorBDSOpaque.job == NULL. Have our +filter node handle that case gracefully. + +Signed-off-by: Hanna Reitz +Message-Id: <20221109165452.67927-4-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit da93d5c84e56e6b4e84aa8e98b6b984c9b6bb528) +Signed-off-by: Hanna Czenczek +--- + block/mirror.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 6b02555ad7..50289fca49 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1438,11 +1438,13 @@ static int coroutine_fn bdrv_mirror_top_do_write(BlockDriverState *bs, + MirrorOp *op = NULL; + MirrorBDSOpaque *s = bs->opaque; + int ret = 0; +- bool copy_to_target; ++ bool copy_to_target = false; + +- copy_to_target = s->job->ret >= 0 && +- !job_is_cancelled(&s->job->common.job) && +- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ if (s->job) { ++ copy_to_target = s->job->ret >= 0 && ++ !job_is_cancelled(&s->job->common.job) && ++ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ } + + if (copy_to_target) { + op = active_write_prepare(s->job, offset, bytes); +@@ -1487,11 +1489,13 @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs, + QEMUIOVector bounce_qiov; + void *bounce_buf; + int ret = 0; +- bool copy_to_target; ++ bool copy_to_target = false; + +- copy_to_target = s->job->ret >= 0 && +- !job_is_cancelled(&s->job->common.job) && +- s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ if (s->job) { ++ copy_to_target = s->job->ret >= 0 && ++ !job_is_cancelled(&s->job->common.job) && ++ s->job->copy_mode == MIRROR_COPY_MODE_WRITE_BLOCKING; ++ } + + if (copy_to_target) { + /* The guest might concurrently modify the data to write; but +-- +2.37.3 + diff --git a/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..324021b --- /dev/null +++ b/SOURCES/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,52 @@ +From d5a85fcf996948d1154e88e9ee3b4e8c64ec2694 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:08 +0100 +Subject: [PATCH 2/6] block/nbd: Assert there are no timers when closed + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [2/6] 995795ae9844a7d2b28cb1e57fd7fe81482d0205 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) + +Conflict: +- block/nbd.c: open_timer was introduced after the 6.2 release (for + nbd's @open-timeout parameter), and has not been backported, so drop + the assertion that it is NULL + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index b8e5a9b4cc..aab20125d8 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..7d1c000 --- /dev/null +++ b/SOURCES/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,54 @@ +From 8e23c0f208c6bd5bb64c4f6e4863b93fa6f4e9de Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:06 +0100 +Subject: [PATCH 1/6] block/nbd: Delete reconnect delay timer when done + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [1/6] 70814602a8a43a7c14857d76266d82b1aa5174a9 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..b8e5a9b4cc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..4cd3cce --- /dev/null +++ b/SOURCES/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,107 @@ +From c7f63e7bbc5119d92775e20d1ebbf8280c78b732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:11 +0100 +Subject: [PATCH 5/6] block/nbd: Move s->ioc on AioContext change + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [5/6] 107757b9fbadfb832c75521317108525daa4174e +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) + +Conflict: +- block/nbd.c: open_timer was added after the 6.2 release, so we need + not (and cannot) assert it is NULL here. + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index aab20125d8..a3896c7f5f 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch new file mode 100644 index 0000000..2d8f3b4 --- /dev/null +++ b/SOURCES/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch @@ -0,0 +1,59 @@ +From f4b7133d7aeb1d0b9115d01b5cff4df7f6b24e78 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:25 +0100 +Subject: [PATCH 5/6] block/rbd: fix handling of holes in .bdrv_co_block_status + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [1/2] 352656a5c77cc7855b476c3559a10c6aa64a4f58 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. + +We can see a hole in an image if we diff against base if there exists an older snapshot +of the image and we have discarded blocks in the image where the snapshot has data. + +Fix this by simply handling a hole like an unallocated area. There are no callbacks +for unallocated areas so just bail out if we hit a hole. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Suggested-by: Ilya Dryomov +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-2-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index def96292e0..20bb896c4a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); +- /* +- * we do not diff against a snapshot so we should never receive a callback +- * for a hole. +- */ +- assert(exists); ++ ++ /* treat a hole like an unallocated area and bail out */ ++ if (!exists) { ++ return 0; ++ } + + if (!req->exists && offs > req->offs) { + /* +-- +2.27.0 + diff --git a/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch new file mode 100644 index 0000000..7e052f2 --- /dev/null +++ b/SOURCES/kvm-block-rbd-workaround-for-ceph-issue-53784.patch @@ -0,0 +1,103 @@ +From 8c50eedf03d8e62acd387b9aa9369dadcea9324c Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:26 +0100 +Subject: [PATCH 6/6] block/rbd: workaround for ceph issue #53784 + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [2/2] 1384557462e89bb539d0d25a1a471ad738fb9e89 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +librbd had a bug until early 2022 that affected all versions of ceph that +supported fast-diff. This bug results in reporting of incorrect offsets +if the offset parameter to rbd_diff_iterate2 is not object aligned. + +This patch works around this bug for pre Quincy versions of librbd. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-3-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Tested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 20bb896c4a..8f183eba2a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; ++ uint64_t head = 0; + + assert(offset + bytes <= s->image_size); + +@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + return status; + } + +- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, ++#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) ++ /* ++ * librbd had a bug until early 2022 that affected all versions of ceph that ++ * supported fast-diff. This bug results in reporting of incorrect offsets ++ * if the offset parameter to rbd_diff_iterate2 is not object aligned. ++ * Work around this bug by rounding down the offset to object boundaries. ++ * This is OK because we call rbd_diff_iterate2 with whole_object = true. ++ * However, this workaround only works for non cloned images with default ++ * striping. ++ * ++ * See: https://tracker.ceph.com/issues/53784 ++ */ ++ ++ /* check if RBD image has non-default striping enabled */ ++ if (features & RBD_FEATURE_STRIPINGV2) { ++ return status; ++ } ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ /* ++ * check if RBD image is a clone (= has a parent). ++ * ++ * rbd_get_parent_info is deprecated from Nautilus onwards, but the ++ * replacement rbd_get_parent is not present in Luminous and Mimic. ++ */ ++ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { ++ return status; ++ } ++#pragma GCC diagnostic pop ++ ++ head = req.offs & (s->object_size - 1); ++ req.offs -= head; ++ bytes += head; ++#endif ++ ++ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; +@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + +- *pnum = req.bytes; ++ assert(req.bytes > head); ++ *pnum = req.bytes - head; + return status; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch b/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch new file mode 100644 index 0000000..040cfe1 --- /dev/null +++ b/SOURCES/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch @@ -0,0 +1,58 @@ +From abd84f26e0fe0bc9952d91fbd35fb3a7253cfecf Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 20:54:45 -0400 +Subject: [PATCH 1/2] display/qxl-render: fix race condition in qxl_cursor + (CVE-2021-4207) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 152: display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) +RH-Commit: [1/1] f05b9a956f2e0ca522b5be127beff813d04b5588 (jmaloy/qemu-kvm) +RH-Bugzilla: 2040738 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Mauro Matteo Cascella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2040738 +Upstream: Merged +CVE: CVE-2021-4207 + +commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895 +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:11:06 2022 +0200 + + display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) + + Avoid fetching 'width' and 'height' a second time to prevent possible + race condition. Refer to security advisory + https://starlabs.sg/advisories/22-4207/ for more information. + + Fixes: CVE-2021-4207 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081106.343235-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index d28849b121..237ed293ba 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -266,7 +266,7 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + } + break; + case SPICE_CURSOR_TYPE_ALPHA: +- size = sizeof(uint32_t) * cursor->header.width * cursor->header.height; ++ size = sizeof(uint32_t) * c->width * c->height; + qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id); + if (qxl->debug > 2) { + cursor_print_ascii_art(c, "qxl/alpha"); +-- +2.27.0 + diff --git a/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch new file mode 100644 index 0000000..4be5d14 --- /dev/null +++ b/SOURCES/kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch @@ -0,0 +1,127 @@ +From 103608465b8bd2edf7f9aaef5c3c93309ccf9ec2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:17 -0500 +Subject: [PATCH 12/13] dma-helpers: prevent dma_blk_cb() vs dma_aio_cancel() + race + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [2/3] 14f5835093ba8c5111f3ada2fe87730371aca733 + +dma_blk_cb() only takes the AioContext lock around ->io_func(). That +means the rest of dma_blk_cb() is not protected. In particular, the +DMAAIOCB field accesses happen outside the lock. + +There is a race when the main loop thread holds the AioContext lock and +invokes scsi_device_purge_requests() -> bdrv_aio_cancel() -> +dma_aio_cancel() while an IOThread executes dma_blk_cb(). The dbs->acb +field determines how cancellation proceeds. If dma_aio_cancel() sees +dbs->acb == NULL while dma_blk_cb() is still running, the request can be +completed twice (-ECANCELED and the actual return value). + +The following assertion can occur with virtio-scsi when an IOThread is +used: + + ../hw/scsi/scsi-disk.c:368: scsi_dma_complete: Assertion `r->req.aiocb != NULL' failed. + +Fix the race by holding the AioContext across dma_blk_cb(). Now +dma_aio_cancel() under the AioContext lock will not see +inconsistent/intermediate states. + +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit abfcd2760b3e70727bbc0792221b8b98a733dc32) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 4 +--- + softmmu/dma-helpers.c | 12 +++++++----- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 179ce22c4a..c8109a673e 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -351,13 +351,12 @@ done: + scsi_req_unref(&r->req); + } + ++/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -367,7 +366,6 @@ static void scsi_dma_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) +diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c +index 7d766a5e89..42af18719a 100644 +--- a/softmmu/dma-helpers.c ++++ b/softmmu/dma-helpers.c +@@ -127,17 +127,19 @@ static void dma_complete(DMAAIOCB *dbs, int ret) + static void dma_blk_cb(void *opaque, int ret) + { + DMAAIOCB *dbs = (DMAAIOCB *)opaque; ++ AioContext *ctx = dbs->ctx; + dma_addr_t cur_addr, cur_len; + void *mem; + + trace_dma_blk_cb(dbs, ret); + ++ aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- return; ++ goto out; + } + dma_blk_unmap(dbs); + +@@ -177,9 +179,9 @@ static void dma_blk_cb(void *opaque, int ret) + + if (dbs->iov.size == 0) { + trace_dma_map_wait(dbs); +- dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs); ++ dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- return; ++ goto out; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -187,11 +189,11 @@ static void dma_blk_cb(void *opaque, int ret) + QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align)); + } + +- aio_context_acquire(dbs->ctx); + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); +- aio_context_release(dbs->ctx); + assert(dbs->acb); ++out: ++ aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.37.3 + diff --git a/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch b/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch new file mode 100644 index 0000000..0bed8a6 --- /dev/null +++ b/SOURCES/kvm-doc-Add-the-SGX-numa-description.patch @@ -0,0 +1,77 @@ +From e8377e3f4d540e2594a50985523e87d1f3cabbc7 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:08 -0400 +Subject: [PATCH 3/7] doc: Add the SGX numa description + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [3/5] 41c74688c9662b966c243566a837135ff52341c4 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGX numa reference command and how to check if +SGX numa is support or not with multiple EPC sections. + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) +Signed-off-by: Paul Lai +--- + docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst +index f8fade5ac2..0f0a73f758 100644 +--- a/docs/system/i386/sgx.rst ++++ b/docs/system/i386/sgx.rst +@@ -141,8 +141,7 @@ To launch a SGX guest: + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ +- -object memory-backend-epc,id=mem2,size=28M \\ +- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 ++ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 + + Utilizing SGX in the guest requires a kernel/OS with SGX support. + The support can be determined in guest by:: +@@ -152,8 +151,32 @@ The support can be determined in guest by:: + and SGX epc info by:: + + $ dmesg | grep sgx +- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff +- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff ++ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff ++ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. ++ ++To launch a SGX numa guest: ++ ++.. parsed-literal:: ++ ++ |qemu_system_x86| \\ ++ -cpu host,+sgx-provisionkey \\ ++ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ ++ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ ++ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ ++ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ ++ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ ++ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ ++ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 ++ ++and SGX epc numa info by:: ++ ++ $ dmesg | grep sgx ++ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff ++ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff ++ ++ $ dmesg | grep SRAT ++ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] ++ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] + + References + ---------- +-- +2.27.0 + diff --git a/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch b/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch new file mode 100644 index 0000000..4b6c3fe --- /dev/null +++ b/SOURCES/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch @@ -0,0 +1,70 @@ +From 407e23d7f0c9020404247afe7d4df98505222bbb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 14 Nov 2022 14:25:02 +0100 +Subject: [PATCH 1/3] docs/system/s390x: Document the "loadparm" machine + property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] e9589ea32d2a8f82971476b644e1063fa14cf822 + +The "loadparm" machine property is useful for selecting alternative +kernels on the disk of the guest, but so far we do not tell the users +yet how to use it. Add some documentation to fill this gap. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2128235 +Message-Id: <20221114132502.110213-1-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit be5df2edb5d69ff3107c5616aa035a9ba8d0422e) +--- + docs/system/s390x/bootdevices.rst | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/docs/system/s390x/bootdevices.rst b/docs/system/s390x/bootdevices.rst +index 9e591cb9dc..d4bf3b9f0b 100644 +--- a/docs/system/s390x/bootdevices.rst ++++ b/docs/system/s390x/bootdevices.rst +@@ -53,6 +53,32 @@ recommended to specify a CD-ROM device via ``-device scsi-cd`` (as mentioned + above) instead. + + ++Selecting kernels with the ``loadparm`` property ++------------------------------------------------ ++ ++The ``s390-ccw-virtio`` machine supports the so-called ``loadparm`` parameter ++which can be used to select the kernel on the disk of the guest that the ++s390-ccw bios should boot. When starting QEMU, it can be specified like this:: ++ ++ qemu-system-s390x -machine s390-ccw-virtio,loadparm= ++ ++The first way to use this parameter is to use the word ``PROMPT`` as the ++```` here. In that case the s390-ccw bios will show a list of ++installed kernels on the disk of the guest and ask the user to enter a number ++to chose which kernel should be booted -- similar to what can be achieved by ++specifying the ``-boot menu=on`` option when starting QEMU. Note that the menu ++list will only show the names of the installed kernels when using a DASD-like ++disk image with 4k byte sectors. On normal SCSI-style disks with 512-byte ++sectors, there is not enough space for the zipl loader on the disk to store ++the kernel names, so you only get a list without names here. ++ ++The second way to use this parameter is to use a number in the range from 0 ++to 31. The numbers that can be used here correspond to the numbers that are ++shown when using the ``PROMPT`` option, and the s390-ccw bios will then try ++to automatically boot the kernel that is associated with the given number. ++Note that ``0`` can be used to boot the default entry. ++ ++ + Booting from a network device + ----------------------------- + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch b/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch new file mode 100644 index 0000000..bc06fa8 --- /dev/null +++ b/SOURCES/kvm-dump-Add-architecture-section-and-section-string-tab.patch @@ -0,0 +1,356 @@ +From f2f3efff83dddd38a97699cd2701f46f61a732e3 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 11:32:10 +0000 +Subject: [PATCH 36/42] dump: Add architecture section and section string table + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [36/41] 83b98ff185e93e62703f686b65546d60c783d783 + +Add hooks which architectures can use to add arbitrary data to custom +sections. + +Also add a section name string table in order to identify section +contents + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017113210.41674-1-frankja@linux.ibm.com> +(cherry picked from commit 9b72224f44612ddd5b434a1bccf79346946d11da) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 186 +++++++++++++++++++++++++++++++------ + include/sysemu/dump-arch.h | 3 + + include/sysemu/dump.h | 3 + + 3 files changed, 166 insertions(+), 26 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7a42401790..4aa8fb64d2 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -104,6 +104,7 @@ static int dump_cleanup(DumpState *s) + memory_mapping_list_free(&s->list); + close(s->fd); + g_free(s->guest_note); ++ g_array_unref(s->string_table_buf); + s->guest_note = NULL; + if (s->resume) { + if (s->detached) { +@@ -153,11 +154,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) +@@ -181,11 +181,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void write_elf_header(DumpState *s, Error **errp) +@@ -196,6 +195,8 @@ static void write_elf_header(DumpState *s, Error **errp) + void *header_ptr; + int ret; + ++ /* The NULL header and the shstrtab are always defined */ ++ assert(s->shdr_num >= 2); + if (dump_is_64bit(s)) { + prepare_elf64_header(s, &elf64_header); + header_size = sizeof(elf64_header); +@@ -394,17 +395,49 @@ static void prepare_elf_section_hdr_zero(DumpState *s) + } + } + +-static void prepare_elf_section_hdrs(DumpState *s) ++static void prepare_elf_section_hdr_string(DumpState *s, void *buff) ++{ ++ uint64_t index = s->string_table_buf->len; ++ const char strtab[] = ".shstrtab"; ++ Elf32_Shdr shdr32 = {}; ++ Elf64_Shdr shdr64 = {}; ++ int shdr_size; ++ void *shdr; ++ ++ g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab)); ++ if (dump_is_64bit(s)) { ++ shdr_size = sizeof(Elf64_Shdr); ++ shdr64.sh_type = SHT_STRTAB; ++ shdr64.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr64.sh_name = index; ++ shdr64.sh_size = s->string_table_buf->len; ++ shdr = &shdr64; ++ } else { ++ shdr_size = sizeof(Elf32_Shdr); ++ shdr32.sh_type = SHT_STRTAB; ++ shdr32.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr32.sh_name = index; ++ shdr32.sh_size = s->string_table_buf->len; ++ shdr = &shdr32; ++ } ++ memcpy(buff, shdr, shdr_size); ++} ++ ++static bool prepare_elf_section_hdrs(DumpState *s, Error **errp) + { + size_t len, sizeof_shdr; ++ void *buff_hdr; + + /* + * Section ordering: + * - HDR zero ++ * - Arch section hdrs ++ * - String table hdr + */ + sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + len = sizeof_shdr * s->shdr_num; + s->elf_section_hdrs = g_malloc0(len); ++ buff_hdr = s->elf_section_hdrs; + + /* + * The first section header is ALWAYS a special initial section +@@ -420,6 +453,26 @@ static void prepare_elf_section_hdrs(DumpState *s) + if (s->phdr_num >= PN_XNUM) { + prepare_elf_section_hdr_zero(s); + } ++ buff_hdr += sizeof_shdr; ++ ++ /* Add architecture defined section headers */ ++ if (s->dump_info.arch_sections_write_hdr_fn ++ && s->shdr_num > 2) { ++ buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr); ++ ++ if (s->shdr_num >= SHN_LORESERVE) { ++ error_setg_errno(errp, EINVAL, ++ "dump: too many architecture defined sections"); ++ return false; ++ } ++ } ++ ++ /* ++ * String table is the last section since strings are added via ++ * arch_sections_write_hdr(). ++ */ ++ prepare_elf_section_hdr_string(s, buff_hdr); ++ return true; + } + + static void write_elf_section_headers(DumpState *s, Error **errp) +@@ -427,7 +480,9 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + int ret; + +- prepare_elf_section_hdrs(s); ++ if (!prepare_elf_section_hdrs(s, errp)) { ++ return; ++ } + + ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +@@ -437,6 +492,29 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + g_free(s->elf_section_hdrs); + } + ++static void write_elf_sections(DumpState *s, Error **errp) ++{ ++ int ret; ++ ++ if (s->elf_section_data_size) { ++ /* Write architecture section data */ ++ ret = fd_write_vmcore(s->elf_section_data, ++ s->elf_section_data_size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write architecture section data"); ++ return; ++ } ++ } ++ ++ /* Write string table */ ++ ret = fd_write_vmcore(s->string_table_buf->data, ++ s->string_table_buf->len, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "dump: failed to write string table data"); ++ } ++} ++ + static void write_data(DumpState *s, void *buf, int length, Error **errp) + { + int ret; +@@ -693,6 +771,31 @@ static void dump_iterate(DumpState *s, Error **errp) + } + } + ++static void dump_end(DumpState *s, Error **errp) ++{ ++ int rc; ++ ERRP_GUARD(); ++ ++ if (s->elf_section_data_size) { ++ s->elf_section_data = g_malloc0(s->elf_section_data_size); ++ } ++ ++ /* Adds the architecture defined section data to s->elf_section_data */ ++ if (s->dump_info.arch_sections_write_fn && ++ s->elf_section_data_size) { ++ rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data); ++ if (rc) { ++ error_setg_errno(errp, rc, ++ "dump: failed to get arch section data"); ++ g_free(s->elf_section_data); ++ return; ++ } ++ } ++ ++ /* write sections to vmcore */ ++ write_elf_sections(s, errp); ++} ++ + static void create_vmcore(DumpState *s, Error **errp) + { + ERRP_GUARD(); +@@ -702,7 +805,14 @@ static void create_vmcore(DumpState *s, Error **errp) + return; + } + ++ /* Iterate over memory and dump it to file */ + dump_iterate(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* Write the section data */ ++ dump_end(s, errp); + } + + static int write_start_flat_header(int fd) +@@ -1720,6 +1830,14 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->filter_area_begin = begin; + s->filter_area_length = length; + ++ /* First index is 0, it's the special null name */ ++ s->string_table_buf = g_array_new(FALSE, TRUE, 1); ++ /* ++ * Allocate the null name, due to the clearing option set to true ++ * it will be 0. ++ */ ++ g_array_set_size(s->string_table_buf, 1); ++ + memory_mapping_list_init(&s->list); + + guest_phys_blocks_init(&s->guest_phys_blocks); +@@ -1856,26 +1974,42 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + /* +- * calculate phdr_num ++ * The first section header is always a special one in which most ++ * fields are 0. The section header string table is also always ++ * set. ++ */ ++ s->shdr_num = 2; ++ ++ /* ++ * Adds the number of architecture sections to shdr_num and sets ++ * elf_section_data_size so we know the offsets and sizes of all ++ * parts. ++ */ ++ if (s->dump_info.arch_sections_add_fn) { ++ s->dump_info.arch_sections_add_fn(s); ++ } ++ ++ /* ++ * calculate shdr_num so we know the offsets and sizes of all ++ * parts. ++ * Calculate phdr_num + * +- * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow ++ * The absolute maximum amount of phdrs is UINT32_MAX - 1 as ++ * sh_info is 32 bit. There's special handling once we go over ++ * UINT16_MAX - 1 but that is handled in the ehdr and section ++ * code. + */ +- s->phdr_num = 1; /* PT_NOTE */ +- if (s->list.num < UINT16_MAX - 2) { +- s->shdr_num = 0; ++ s->phdr_num = 1; /* Reserve PT_NOTE */ ++ if (s->list.num <= UINT32_MAX - 1) { + s->phdr_num += s->list.num; + } else { +- /* sh_info of section 0 holds the real number of phdrs */ +- s->shdr_num = 1; +- +- /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ +- if (s->list.num <= UINT32_MAX - 1) { +- s->phdr_num += s->list.num; +- } else { +- s->phdr_num = UINT32_MAX; +- } ++ s->phdr_num = UINT32_MAX; + } + ++ /* ++ * Now that the number of section and program headers is known we ++ * can calculate the offsets of the headers and data. ++ */ + if (dump_is_64bit(s)) { + s->shdr_offset = sizeof(Elf64_Ehdr); + s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h +index e25b02e990..59bbc9be38 100644 +--- a/include/sysemu/dump-arch.h ++++ b/include/sysemu/dump-arch.h +@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo { + uint32_t page_size; /* The target's page size. If it's variable and + * unknown, then this should be the maximum. */ + uint64_t phys_base; /* The target's physmem base. */ ++ void (*arch_sections_add_fn)(DumpState *s); ++ uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); ++ int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); + } ArchDumpInfo; + + struct GuestPhysBlockList; /* memory_mapping.h */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9ed811b313..38ccac7190 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -180,6 +180,9 @@ typedef struct DumpState { + hwaddr note_offset; + + void *elf_section_hdrs; /* Pointer to section header buffer */ ++ void *elf_section_data; /* Pointer to section data buffer */ ++ uint64_t elf_section_data_size; /* Size of section data */ ++ GArray *string_table_buf; /* String table data buffer */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Add-more-offset-variables.patch b/SOURCES/kvm-dump-Add-more-offset-variables.patch new file mode 100644 index 0000000..373f814 --- /dev/null +++ b/SOURCES/kvm-dump-Add-more-offset-variables.patch @@ -0,0 +1,138 @@ +From bee31226b87d0b05faae84e88cce3af1b8dabbfd Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:59 +0000 +Subject: [PATCH 17/42] dump: Add more offset variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [17/41] fbe629e1476e8a0e039f989af6e1f4707075ba01 + +Offset calculations are easy enough to get wrong. Let's add a few +variables to make moving around elf headers and data sections easier. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Richard Henderson +Message-Id: <20220330123603.107120-6-frankja@linux.ibm.com> +(cherry picked from commit e71d353360bb09a8e784e35d78370c691f6ea185) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 35 +++++++++++++++-------------------- + include/sysemu/dump.h | 4 ++++ + 2 files changed, 19 insertions(+), 20 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 5cc2322325..85a402b38c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -142,13 +142,11 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump64(s, shoff); ++ elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -179,13 +177,11 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump32(s, shoff); ++ elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -248,12 +244,11 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + static void write_elf64_note(DumpState *s, Error **errp) + { + Elf64_Phdr phdr; +- hwaddr begin = s->memory_offset - s->note_size; + int ret; + + memset(&phdr, 0, sizeof(Elf64_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, begin); ++ phdr.p_offset = cpu_to_dump64(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump64(s, s->note_size); + phdr.p_memsz = cpu_to_dump64(s, s->note_size); +@@ -313,13 +308,12 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + + static void write_elf32_note(DumpState *s, Error **errp) + { +- hwaddr begin = s->memory_offset - s->note_size; + Elf32_Phdr phdr; + int ret; + + memset(&phdr, 0, sizeof(Elf32_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, begin); ++ phdr.p_offset = cpu_to_dump32(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump32(s, s->note_size); + phdr.p_memsz = cpu_to_dump32(s, s->note_size); +@@ -1826,15 +1820,16 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + +- s->note_size; ++ s->phdr_offset = sizeof(Elf64_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + +- s->note_size; ++ ++ s->phdr_offset = sizeof(Elf32_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } + + return; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 19458bffbd..ffc2ea1072 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -159,6 +159,10 @@ typedef struct DumpState { + bool resume; + bool detached; + ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; + hwaddr memory_offset; + int fd; + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch b/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch new file mode 100644 index 0000000..449aab4 --- /dev/null +++ b/SOURCES/kvm-dump-Cleanup-dump_begin-write-functions.patch @@ -0,0 +1,94 @@ +From cbb653d73e32513ccd46b293a52384eed6a5f84f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:02 +0000 +Subject: [PATCH 20/42] dump: Cleanup dump_begin write functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [20/41] 18ea1457a3e54fd368e556d96c3be50c6ad0a6bd + +There's no need to have a gigantic if in there let's move the elf +32/64 bit logic into the section, segment or note code. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-9-frankja@linux.ibm.com> +(cherry picked from commit 5ff2e5a3e1e67930e523486e39549a33fcf97227) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 42 +++++++++++------------------------------- + 1 file changed, 11 insertions(+), 31 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 823ca32883..88abde355a 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -565,46 +565,26 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOAD to vmcore */ ++ write_elf_loads(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* write section to vmcore */ ++ if (s->shdr_num) { ++ write_elf_section(s, 1, errp); + if (*errp) { + return; + } ++ } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } +- } +- ++ if (dump_is_64bit(s)) { + /* write notes to vmcore */ + write_elf64_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } else { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); +- if (*errp) { +- return; +- } +- +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 0, errp); +- if (*errp) { +- return; +- } +- } +- + /* write notes to vmcore */ + write_elf32_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Consolidate-elf-note-function.patch b/SOURCES/kvm-dump-Consolidate-elf-note-function.patch new file mode 100644 index 0000000..3353e4a --- /dev/null +++ b/SOURCES/kvm-dump-Consolidate-elf-note-function.patch @@ -0,0 +1,67 @@ +From 0547599cf507930f91943f22d5f917ebacf69484 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:03 +0000 +Subject: [PATCH 21/42] dump: Consolidate elf note function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [21/41] 52298c098c116aea75ad15894731ff412c2c4e73 + +Just like with the other write functions let's move the 32/64 bit elf +handling to a function to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-10-frankja@linux.ibm.com> +(cherry picked from commit c68124738bc29017e4254c898bc40be7be477af7) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88abde355a..a451abc590 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -520,6 +520,15 @@ static void write_elf_loads(DumpState *s, Error **errp) + } + } + ++static void write_elf_notes(DumpState *s, Error **errp) ++{ ++ if (dump_is_64bit(s)) { ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ } else { ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ } ++} ++ + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +@@ -579,13 +588,8 @@ static void dump_begin(DumpState *s, Error **errp) + } + } + +- if (dump_is_64bit(s)) { +- /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, errp); +- } else { +- /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, errp); +- } ++ /* write notes to vmcore */ ++ write_elf_notes(s, errp); + } + + static int get_next_block(DumpState *s, GuestPhysBlock *block) +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch b/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch new file mode 100644 index 0000000..700927a --- /dev/null +++ b/SOURCES/kvm-dump-Consolidate-phdr-note-writes.patch @@ -0,0 +1,169 @@ +From f87abe1ef14e80731249ebe9fe1bea569a68e9b4 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:01 +0000 +Subject: [PATCH 19/42] dump: Consolidate phdr note writes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [19/41] 180c4c0ab4941a0bf366dc7f32ee035e03daa6c0 + +There's no need to have two write functions. Let's rather have two +functions that set the data for elf 32/64 and then write it in a +common function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-8-frankja@linux.ibm.com> +(cherry picked from commit bc7d558017e6700f9a05c61b0b638a8994945f0d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 94 +++++++++++++++++++++++++++-------------------------- + 1 file changed, 48 insertions(+), 46 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 6394e94023..823ca32883 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -246,24 +246,15 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_note(DumpState *s, Error **errp) ++static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { +- Elf64_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf64_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump64(s, s->note_size); +- phdr.p_memsz = cpu_to_dump64(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump64(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump64(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump64(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static inline int cpu_index(CPUState *cpu) +@@ -311,24 +302,15 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_note(DumpState *s, Error **errp) ++static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { +- Elf32_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf32_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump32(s, s->note_size); +- phdr.p_memsz = cpu_to_dump32(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump32(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump32(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump32(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, +@@ -358,6 +340,32 @@ static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + ++static void write_elf_phdr_note(DumpState *s, Error **errp) ++{ ++ ERRP_GUARD(); ++ Elf32_Phdr phdr32; ++ Elf64_Phdr phdr64; ++ void *phdr; ++ size_t size; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ write_elf64_phdr_note(s, &phdr64); ++ size = sizeof(phdr64); ++ phdr = &phdr64; ++ } else { ++ write_elf32_phdr_note(s, &phdr32); ++ size = sizeof(phdr32); ++ phdr = &phdr32; ++ } ++ ++ ret = fd_write_vmcore(phdr, size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write program header table"); ++ } ++} ++ + static void write_elf_section(DumpState *s, int type, Error **errp) + { + Elf32_Shdr shdr32; +@@ -551,13 +559,13 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write PT_NOTE to vmcore */ +- write_elf64_note(s, errp); +- if (*errp) { +- return; +- } ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); ++ if (*errp) { ++ return; ++ } + ++ if (dump_is_64bit(s)) { + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +@@ -578,12 +586,6 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + } else { +- /* write PT_NOTE to vmcore */ +- write_elf32_note(s, errp); +- if (*errp) { +- return; +- } +- + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch b/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch new file mode 100644 index 0000000..ac5d269 --- /dev/null +++ b/SOURCES/kvm-dump-Introduce-dump_is_64bit-helper-function.patch @@ -0,0 +1,118 @@ +From c851676d202b5b76962529f3b6d433936becbd8a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:00 +0000 +Subject: [PATCH 18/42] dump: Introduce dump_is_64bit() helper function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [18/41] a0fd2d1985c61b8e50d4a7ca26bc0ee6fcaa6196 + +Checking d_class in dump_info leads to lengthy conditionals so let's +shorten things a bit by introducing a helper function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-7-frankja@linux.ibm.com> +(cherry picked from commit 05bbaa5040ccb3419e8b93af8040485430e2db42) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 85a402b38c..6394e94023 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -55,6 +55,11 @@ static Error *dump_migration_blocker; + DIV_ROUND_UP((name_size), 4) + \ + DIV_ROUND_UP((desc_size), 4)) * 4) + ++static inline bool dump_is_64bit(DumpState *s) ++{ ++ return s->dump_info.d_class == ELFCLASS64; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -489,7 +494,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + get_offset_range(memory_mapping->phys_addr, + memory_mapping->length, + s, &offset, &filesz); +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, + filesz, errp); + } else { +@@ -537,7 +542,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_header(s, errp); + } else { + write_elf32_header(s, errp); +@@ -546,7 +551,7 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + /* write PT_NOTE to vmcore */ + write_elf64_note(s, errp); + if (*errp) { +@@ -757,7 +762,7 @@ static void get_note_sizes(DumpState *s, const void *note, + uint64_t name_sz; + uint64_t desc_sz; + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + const Elf64_Nhdr *hdr = note; + note_head_sz = sizeof(Elf64_Nhdr); + name_sz = tswap64(hdr->n_namesz); +@@ -1017,10 +1022,10 @@ out: + + static void write_dump_header(DumpState *s, Error **errp) + { +- if (s->dump_info.d_class == ELFCLASS32) { +- create_header32(s, errp); +- } else { ++ if (dump_is_64bit(s)) { + create_header64(s, errp); ++ } else { ++ create_header32(s, errp); + } + } + +@@ -1715,8 +1720,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + uint32_t size; + uint16_t format; + +- note_head_size = s->dump_info.d_class == ELFCLASS32 ? +- sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr); ++ note_head_size = dump_is_64bit(s) ? ++ sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); + + format = le16_to_cpu(vmci->vmcoreinfo.guest_format); + size = le32_to_cpu(vmci->vmcoreinfo.size); +@@ -1819,7 +1824,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + s->phdr_offset = sizeof(Elf64_Ehdr); + s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch b/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch new file mode 100644 index 0000000..71414ed --- /dev/null +++ b/SOURCES/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch @@ -0,0 +1,136 @@ +From 255722667a4fa4d522bb0b7e0825cbbe635abb8d Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:57 +0000 +Subject: [PATCH 15/42] dump: Introduce shdr_num to decrease complexity +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [15/41] b0215ea5d381ef7f6abfe3f3bafea51ce933da56 + +Let's move from a boolean to a int variable which will later enable us +to store the number of sections that are in the dump file. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-4-frankja@linux.ibm.com> +(cherry picked from commit 862a395858e5a302ed5921487777acdc95a3a31b) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++++++------------ + include/sysemu/dump.h | 2 +- + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7236b167cc..972e28b089 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -145,12 +145,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -182,12 +182,12 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -566,7 +566,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 1, errp); + if (*errp) { + return; +@@ -592,7 +592,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 0, errp); + if (*errp) { + return; +@@ -1811,11 +1811,11 @@ static void dump_init(DumpState *s, int fd, bool has_format, + */ + s->phdr_num = 1; /* PT_NOTE */ + if (s->list.num < UINT16_MAX - 2) { ++ s->shdr_num = 0; + s->phdr_num += s->list.num; +- s->have_section = false; + } else { + /* sh_info of section 0 holds the real number of phdrs */ +- s->have_section = true; ++ s->shdr_num = 1; + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +@@ -1826,19 +1826,19 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) + s->note_size; ++ sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; + } + } else { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) + s->note_size; ++ sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b463fc9c02..19458bffbd 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -155,7 +155,7 @@ typedef struct DumpState { + ArchDumpInfo dump_info; + MemoryMappingList list; + uint32_t phdr_num; +- bool have_section; ++ uint32_t shdr_num; + bool resume; + bool detached; + ssize_t note_size; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch b/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch new file mode 100644 index 0000000..5d66c34 --- /dev/null +++ b/SOURCES/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch @@ -0,0 +1,142 @@ +From a18ba2fbaf132724e81be92da42b36d8f365e66c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:56 +0000 +Subject: [PATCH 24/42] dump: Refactor dump_iterate and introduce + dump_filter_memblock_*() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [24/41] 74ef470f24d9d98093c4d63730a99474587033fd + +The iteration over the memblocks in dump_iterate() is hard to +understand so it's about time to clean it up. Instead of manually +grabbing the next memblock we can use QTAILQ_FOREACH to iterate over +all memblocks. + +Additionally we move the calculation of the offset and length out by +introducing and using the dump_filter_memblock_*() functions. These +functions will later be used to cleanup other parts of dump.c. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-4-frankja@linux.ibm.com> +(cherry picked from commit 1e8113032f5b1efc5da66382470ce4809c76f8f2) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 74 ++++++++++++++++++++++++++++++----------------------- + 1 file changed, 42 insertions(+), 32 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index fa787f379f..d981e843dd 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -592,31 +592,43 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int get_next_block(DumpState *s, GuestPhysBlock *block) ++static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { +- while (1) { +- block = QTAILQ_NEXT(block, next); +- if (!block) { +- /* no more block */ +- return 1; +- } ++ int64_t size, left, right; + +- s->start = 0; +- s->next_block = block; +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { +- /* This block is out of the range */ +- continue; +- } ++ /* No filter, return full size */ ++ if (!filter_area_length) { ++ return block->target_end - block->target_start; ++ } + +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } ++ /* calculate the overlapped region. */ ++ left = MAX(filter_area_start, block->target_start); ++ right = MIN(filter_area_start + filter_area_length, block->target_end); ++ size = right - left; ++ size = size > 0 ? size : 0; ++ ++ return size; ++} ++ ++static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) ++{ ++ if (filter_area_length) { ++ /* return -1 if the block is not within filter area */ ++ if (block->target_start >= filter_area_start + filter_area_length || ++ block->target_end <= filter_area_start) { ++ return -1; + } + +- return 0; ++ if (filter_area_start > block->target_start) { ++ return filter_area_start - block->target_start; ++ } + } ++ ++ return 0; + } + + /* write all memory to vmcore */ +@@ -624,24 +636,22 @@ static void dump_iterate(DumpState *s, Error **errp) + { + ERRP_GUARD(); + GuestPhysBlock *block; +- int64_t size; +- +- do { +- block = s->next_block; ++ int64_t memblock_size, memblock_start; + +- size = block->target_end - block->target_start; +- if (s->has_filter) { +- size -= s->start; +- if (s->begin + s->length < block->target_end) { +- size -= block->target_end - (s->begin + s->length); +- } ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ if (memblock_start == -1) { ++ continue; + } +- write_memory(s, block, s->start, size, errp); ++ ++ memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ ++ /* Write the memory to file */ ++ write_memory(s, block, memblock_start, memblock_size, errp); + if (*errp) { + return; + } +- +- } while (!get_next_block(s, block)); ++ } + } + + static void create_vmcore(DumpState *s, Error **errp) +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch b/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch new file mode 100644 index 0000000..0bb95f9 --- /dev/null +++ b/SOURCES/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch @@ -0,0 +1,45 @@ +From 6932fe3afbec443bbf6acff5b707536254e1bc37 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:16 +0000 +Subject: [PATCH 35/42] dump: Reintroduce memory_offset and section_offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [35/41] e60c0d066aeeedb42e724712bc3aa7b7591c6c79 + +section_offset will later be used to store the offset to the section +data which will be stored last. For now memory_offset is only needed +to make section_offset look nicer. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-5-frankja@linux.ibm.com> +(cherry picked from commit 13fd417ddc81a1685c6a8f4e1c80bbfe7150f164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index d17537d4e9..7a42401790 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1885,6 +1885,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; + s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } ++ s->memory_offset = s->note_offset + s->note_size; ++ s->section_offset = s->memory_offset + s->total_size; + + return; + +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch b/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch new file mode 100644 index 0000000..b4a1f10 --- /dev/null +++ b/SOURCES/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch @@ -0,0 +1,70 @@ +From a8eeab6936a2bd27b33b63aed7e2ef96034f7772 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:58 +0000 +Subject: [PATCH 16/42] dump: Remove the section if when calculating the memory + offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [16/41] ff214d2c23b9cb16fd49d22d976829267df43133 + +When s->shdr_num is 0 we'll add 0 bytes of section headers which is +equivalent to not adding section headers but with the multiplication +we can remove a if/else. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-5-frankja@linux.ibm.com> +(cherry picked from commit 344107e07bd81546474a54ab83800158ca953059) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 972e28b089..5cc2322325 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1826,23 +1826,15 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf64_Ehdr) + ++ sizeof(Elf64_Phdr) * s->phdr_num + ++ sizeof(Elf64_Shdr) * s->shdr_num + ++ s->note_size; + } else { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf32_Ehdr) + ++ sizeof(Elf32_Phdr) * s->phdr_num + ++ sizeof(Elf32_Shdr) * s->shdr_num + ++ s->note_size; + } + + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch b/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch new file mode 100644 index 0000000..3c9fe51 --- /dev/null +++ b/SOURCES/kvm-dump-Remove-the-sh_info-variable.patch @@ -0,0 +1,176 @@ +From eb763bec53d6b9aea7a6b60b0cf8c5d8b5f1b35c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 7 Apr 2022 09:48:24 +0000 +Subject: [PATCH 14/42] dump: Remove the sh_info variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [14/41] 24af12b78c8f5a02cf85df2f6b1d64249f9499c9 + +There's no need to have phdr_num and sh_info at the same time. We can +make phdr_num 32 bit and set PN_XNUM when we write the header if +phdr_num >= PN_XNUM. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220407094824.5074-1-frankja@linux.ibm.com> +(cherry picked from commit 046bc4160bc780eaacc2d702a2589f1a7a01188d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 44 +++++++++++++++++++++++-------------------- + include/sysemu/dump.h | 3 +-- + 2 files changed, 25 insertions(+), 22 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 9876123f2e..7236b167cc 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -124,6 +124,12 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + + static void write_elf64_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf64_Ehdr elf_header; + int ret; + +@@ -138,9 +144,9 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info; ++ uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +@@ -155,6 +161,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + + static void write_elf32_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf32_Ehdr elf_header; + int ret; + +@@ -169,9 +181,9 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info; ++ uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +@@ -358,12 +370,12 @@ static void write_elf_section(DumpState *s, int type, Error **errp) + if (type == 0) { + shdr_size = sizeof(Elf32_Shdr); + memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr32; + } else { + shdr_size = sizeof(Elf64_Shdr); + memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr64; + } + +@@ -478,13 +490,6 @@ static void write_elf_loads(DumpState *s, Error **errp) + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; +- uint32_t max_index; +- +- if (s->have_section) { +- max_index = s->sh_info; +- } else { +- max_index = s->phdr_num; +- } + + QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { + get_offset_range(memory_mapping->phys_addr, +@@ -502,7 +507,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + return; + } + +- if (phdr_index >= max_index) { ++ if (phdr_index >= s->phdr_num) { + break; + } + } +@@ -1809,22 +1814,21 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_num += s->list.num; + s->have_section = false; + } else { ++ /* sh_info of section 0 holds the real number of phdrs */ + s->have_section = true; +- s->phdr_num = PN_XNUM; +- s->sh_info = 1; /* PT_NOTE */ + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +- s->sh_info += s->list.num; ++ s->phdr_num += s->list.num; + } else { +- s->sh_info = UINT32_MAX; ++ s->phdr_num = UINT32_MAX; + } + } + + if (s->dump_info.d_class == ELFCLASS64) { + if (s->have_section) { + s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->sh_info + ++ sizeof(Elf64_Phdr) * s->phdr_num + + sizeof(Elf64_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + +@@ -1833,7 +1837,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } else { + if (s->have_section) { + s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->sh_info + ++ sizeof(Elf32_Phdr) * s->phdr_num + + sizeof(Elf32_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 250143cb5a..b463fc9c02 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,8 +154,7 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint16_t phdr_num; +- uint32_t sh_info; ++ uint32_t phdr_num; + bool have_section; + bool resume; + bool detached; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch b/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch new file mode 100644 index 0000000..bdcaccd --- /dev/null +++ b/SOURCES/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch @@ -0,0 +1,69 @@ +From 18fef7f02801d51207d67b8f8ec5f0d828889c78 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:01 +0000 +Subject: [PATCH 29/42] dump: Rename write_elf*_phdr_note to + prepare_elf*_phdr_note +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [29/41] 876cea6f6e51be8df2763f56d0daef99d11fdd49 + +The functions in question do not actually write to the file descriptor +they set up a buffer which is later written to the fd. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-9-frankja@linux.ibm.com> +(cherry picked from commit 2341a94d3a0a8a93a5a977e642da1807b8edaab8) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 8d5226f861..c2c1341ad7 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -261,7 +261,7 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) ++static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -317,7 +317,7 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) ++static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -365,11 +365,11 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + int ret; + + if (dump_is_64bit(s)) { +- write_elf64_phdr_note(s, &phdr64); ++ prepare_elf64_phdr_note(s, &phdr64); + size = sizeof(phdr64); + phdr = &phdr64; + } else { +- write_elf32_phdr_note(s, &phdr32); ++ prepare_elf32_phdr_note(s, &phdr32); + size = sizeof(phdr32); + phdr = &phdr32; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch b/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch new file mode 100644 index 0000000..b5758cf --- /dev/null +++ b/SOURCES/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch @@ -0,0 +1,57 @@ +From 04d4947a22fe3192384ff486d0a979d799ded98e Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:55 +0000 +Subject: [PATCH 23/42] dump: Rename write_elf_loads to write_elf_phdr_loads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [23/41] 18e3ef70b97c525b7c43cf12143204bdb1060e4f + +Let's make it a bit clearer that we write the program headers of the +PT_LOAD type. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +Message-Id: <20220811121111.9878-3-frankja@linux.ibm.com> +(cherry picked from commit afae6056ea79e2d89fd90867de3a01732eae724f) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index a451abc590..fa787f379f 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -491,7 +491,7 @@ static void get_offset_range(hwaddr phys_addr, + } + } + +-static void write_elf_loads(DumpState *s, Error **errp) ++static void write_elf_phdr_loads(DumpState *s, Error **errp) + { + ERRP_GUARD(); + hwaddr offset, filesz; +@@ -574,8 +574,8 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Reorder-struct-DumpState.patch b/SOURCES/kvm-dump-Reorder-struct-DumpState.patch new file mode 100644 index 0000000..2ca3f2a --- /dev/null +++ b/SOURCES/kvm-dump-Reorder-struct-DumpState.patch @@ -0,0 +1,68 @@ +From 7e8d6290099b33f88621b45e62652a97704c9573 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:15 +0000 +Subject: [PATCH 34/42] dump: Reorder struct DumpState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [34/41] 8d44e5e8c86ea5b33644eba141046cd657d0071e + +Let's move ELF related members into one block and guest memory related +ones into another to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-4-frankja@linux.ibm.com> +(cherry picked from commit 8384b73c46fd474847d7e74d121318e344edc3c4) +Signed-off-by: Cédric Le Goater +--- + include/sysemu/dump.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9995f65dc8..9ed811b313 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,15 +154,8 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint32_t phdr_num; +- uint32_t shdr_num; + bool resume; + bool detached; +- ssize_t note_size; +- hwaddr shdr_offset; +- hwaddr phdr_offset; +- hwaddr section_offset; +- hwaddr note_offset; + hwaddr memory_offset; + int fd; + +@@ -177,6 +170,15 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ /* Elf dump related data */ ++ uint32_t phdr_num; ++ uint32_t shdr_num; ++ ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; ++ + void *elf_section_hdrs; /* Pointer to section header buffer */ + + uint8_t *note_buf; /* buffer for notes */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch b/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch new file mode 100644 index 0000000..421a98e --- /dev/null +++ b/SOURCES/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch @@ -0,0 +1,467 @@ +From 8f674e0e12e4b88fc035948612a0b0949e0ad892 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:54 +0000 +Subject: [PATCH 22/42] dump: Replace opaque DumpState pointer with a typed one +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [22/41] 5f071d7ef441ae6f5da70eb56018c4657deee3d7 + +It's always better to convey the type of a pointer if at all +possible. So let's add the DumpState typedef to typedefs.h and move +the dump note functions from the opaque pointers to DumpState +pointers. + +Signed-off-by: Janosch Frank +CC: Peter Maydell +CC: Cédric Le Goater +CC: Daniel Henrique Barboza +CC: David Gibson +CC: Greg Kurz +CC: Palmer Dabbelt +CC: Alistair Francis +CC: Bin Meng +CC: Cornelia Huck +CC: Thomas Huth +CC: Richard Henderson +CC: David Hildenbrand +Acked-by: Daniel Henrique Barboza +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-2-frankja@linux.ibm.com> +(cherry picked from commit 1af0006ab959864dfa2f59e9136c5fb93000b61f) +Signed-off-by: Cédric Le Goater +--- + include/hw/core/sysemu-cpu-ops.h | 8 ++++---- + include/qemu/typedefs.h | 1 + + target/arm/arch_dump.c | 6 ++---- + target/arm/cpu.h | 4 ++-- + target/i386/arch_dump.c | 30 +++++++++++++++--------------- + target/i386/cpu.h | 8 ++++---- + target/ppc/arch_dump.c | 18 +++++++++--------- + target/ppc/cpu.h | 4 ++-- + target/riscv/arch_dump.c | 6 ++---- + target/riscv/cpu.h | 4 ++-- + target/s390x/arch_dump.c | 10 +++++----- + target/s390x/s390x-internal.h | 2 +- + 12 files changed, 49 insertions(+), 52 deletions(-) + +diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h +index a9ba39e5f2..ee169b872c 100644 +--- a/include/hw/core/sysemu-cpu-ops.h ++++ b/include/hw/core/sysemu-cpu-ops.h +@@ -53,25 +53,25 @@ typedef struct SysemuCPUOps { + * 32-bit VM coredump. + */ + int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf64_note: Callback for writing a CPU-specific ELF note to a + * 64-bit VM coredump. + */ + int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + */ + int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 64-bit VM coredump. + */ + int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @virtio_is_big_endian: Callback to return %true if a CPU which supports + * runtime configurable endianness is currently big-endian. +diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h +index ee60eb3de4..ac9d031be6 100644 +--- a/include/qemu/typedefs.h ++++ b/include/qemu/typedefs.h +@@ -125,6 +125,7 @@ typedef struct VirtIODevice VirtIODevice; + typedef struct Visitor Visitor; + typedef struct VMChangeStateEntry VMChangeStateEntry; + typedef struct VMStateDescription VMStateDescription; ++typedef struct DumpState DumpState; + + /* + * Pointer types +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 0184845310..3a824e0aa6 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -232,12 +232,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, + #endif + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct aarch64_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; + +@@ -360,12 +359,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, + } + + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct arm_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index e33f37b70a..8d2f496ef9 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1065,9 +1065,9 @@ int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); + const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname); + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + #ifdef TARGET_AARCH64 + int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); +diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c +index 004141fc04..c290910a04 100644 +--- a/target/i386/arch_dump.c ++++ b/target/i386/arch_dump.c +@@ -42,7 +42,7 @@ typedef struct { + + static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + CPUX86State *env, int id, +- void *opaque) ++ DumpState *s) + { + x86_64_user_regs_struct regs; + Elf64_Nhdr *note; +@@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong); + memcpy(buf, ®s, sizeof(x86_64_user_regs_struct)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env, + } + + static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, +- int id, void *opaque) ++ int id, DumpState *s) + { + x86_elf_prstatus prstatus; + Elf64_Nhdr *note; +@@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + } + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + int ret; +@@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); + + if (lma) { +- ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s); + } else { + #endif +- ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_write_elf64_note(f, &cpu->env, cpuid, s); + #ifdef TARGET_X86_64 + } + #endif +@@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + } + + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + x86_elf_prstatus prstatus; +@@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env) + + static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + CPUX86State *env, +- void *opaque, ++ DumpState *s, + int type) + { + QEMUCPUState state; +@@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &state, sizeof(state)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + } + + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 1); ++ return cpu_write_qemu_note(f, &cpu->env, s, 1); + } + + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 0); ++ return cpu_write_qemu_note(f, &cpu->env, s, 0); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 006b735fe4..5d2ddd81b9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1887,13 +1887,13 @@ extern const VMStateDescription vmstate_x86_cpu; + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + + void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, + Error **errp); +diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c +index bb392f6d88..e9f512bcd4 100644 +--- a/target/ppc/arch_dump.c ++++ b/target/ppc/arch_dump.c +@@ -270,23 +270,23 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + static int ppc_write_all_elf_notes(const char *note_name, + WriteCoreDumpFunction f, + PowerPCCPU *cpu, int id, +- void *opaque) ++ DumpState *s) + { +- NoteFuncArg arg = { .state = opaque }; ++ NoteFuncArg arg = { .state = s }; + int ret = -1; + int note_size; + const NoteFuncDesc *nf; + + for (nf = note_func; nf->note_contents_func; nf++) { +- arg.note.hdr.n_namesz = cpu_to_dump32(opaque, sizeof(arg.note.name)); +- arg.note.hdr.n_descsz = cpu_to_dump32(opaque, nf->contents_size); ++ arg.note.hdr.n_namesz = cpu_to_dump32(s, sizeof(arg.note.name)); ++ arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size); + strncpy(arg.note.name, note_name, sizeof(arg.note.name)); + + (*nf->note_contents_func)(&arg, cpu); + + note_size = + sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size; +- ret = f(&arg.note, note_size, opaque); ++ ret = f(&arg.note, note_size, s); + if (ret < 0) { + return -1; + } +@@ -295,15 +295,15 @@ static int ppc_write_all_elf_notes(const char *note_name, + } + + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } + + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 23e8b76c85..f5fb284706 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1289,9 +1289,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu); + const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name); + #endif + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + #ifndef CONFIG_USER_ONLY + void ppc_cpu_do_interrupt(CPUState *cpu); + bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); +diff --git a/target/riscv/arch_dump.c b/target/riscv/arch_dump.c +index 709f621d82..736a232956 100644 +--- a/target/riscv/arch_dump.c ++++ b/target/riscv/arch_dump.c +@@ -64,12 +64,11 @@ static void riscv64_note_init(struct riscv64_note *note, DumpState *s, + } + + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv64_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i = 0; + const char name[] = "CORE"; + +@@ -134,12 +133,11 @@ static void riscv32_note_init(struct riscv32_note *note, DumpState *s, + } + + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv32_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + const char name[] = "CORE"; + +diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h +index 0760c0af93..4cce524b2c 100644 +--- a/target/riscv/cpu.h ++++ b/target/riscv/cpu.h +@@ -344,9 +344,9 @@ extern const char * const riscv_fpr_regnames[]; + const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); + void riscv_cpu_do_interrupt(CPUState *cpu); + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); + int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + bool riscv_cpu_fp_enabled(CPURISCVState *env); +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index 08daf93ae1..f60a14920d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -204,7 +204,7 @@ static const NoteFuncDesc note_linux[] = { + static int s390x_write_elf64_notes(const char *note_name, + WriteCoreDumpFunction f, + S390CPU *cpu, int id, +- void *opaque, ++ DumpState *s, + const NoteFuncDesc *funcs) + { + Note note; +@@ -222,7 +222,7 @@ static int s390x_write_elf64_notes(const char *note_name, + (*nf->note_contents_func)(¬e, cpu, id); + + note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, opaque); ++ ret = f(¬e, note_size, s); + + if (ret < 0) { + return -1; +@@ -235,16 +235,16 @@ static int s390x_write_elf64_notes(const char *note_name, + + + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + S390CPU *cpu = S390_CPU(cs); + int r; + +- r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core); ++ r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, s, note_core); + if (r) { + return r; + } +- return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux); ++ return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h +index 1a178aed41..02cf6c3f43 100644 +--- a/target/s390x/s390x-internal.h ++++ b/target/s390x/s390x-internal.h +@@ -228,7 +228,7 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb, + + /* arch_dump.c */ + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + + /* cc_helper.c */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch b/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch new file mode 100644 index 0000000..e077bea --- /dev/null +++ b/SOURCES/kvm-dump-Rework-dump_calculate_size-function.patch @@ -0,0 +1,73 @@ +From 1f7cb73592a1922b3a981eb3232098281e07679f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:59 +0000 +Subject: [PATCH 27/42] dump: Rework dump_calculate_size function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [27/41] eaa05c39109b57a119752ad3df66f4c2ace2cbe4 + +dump_calculate_size() sums up all the sizes of the guest memory +blocks. Since we already have a function that calculates the size of a +single memory block (dump_get_memblock_size()) we can simply iterate +over the blocks and use the function instead of calculating the size +ourselves. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-7-frankja@linux.ibm.com> +(cherry picked from commit c370d5300f9ac1f90f8158082d22262b904fe30e) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index f6fe13e258..902a85ef8e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1557,25 +1557,19 @@ bool dump_in_progress(void) + return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE); + } + +-/* calculate total size of memory to be dumped (taking filter into +- * acoount.) */ ++/* ++ * calculate total size of memory to be dumped (taking filter into ++ * account.) ++ */ + static int64_t dump_calculate_size(DumpState *s) + { + GuestPhysBlock *block; +- int64_t size = 0, total = 0, left = 0, right = 0; ++ int64_t total = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (dump_has_filter(s)) { +- /* calculate the overlapped region. */ +- left = MAX(s->filter_area_begin, block->target_start); +- right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); +- size = right - left; +- size = size > 0 ? size : 0; +- } else { +- /* count the whole region in */ +- size = (block->target_end - block->target_start); +- } +- total += size; ++ total += dump_filtered_memblock_size(block, ++ s->filter_area_begin, ++ s->filter_area_length); + } + + return total; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-filter-area-variables.patch b/SOURCES/kvm-dump-Rework-filter-area-variables.patch new file mode 100644 index 0000000..4e22f41 --- /dev/null +++ b/SOURCES/kvm-dump-Rework-filter-area-variables.patch @@ -0,0 +1,187 @@ +From 411f5354b809f6b783946e58d7655135814fb809 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:58 +0000 +Subject: [PATCH 26/42] dump: Rework filter area variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [26/41] f10a5523dfd2724f7a8637fca3ed68ba6df659a5 + +While the DumpState begin and length variables directly mirror the API +variable names they are not very descriptive. So let's add a +"filter_area_" prefix and make has_filter a function checking length > 0. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-6-frankja@linux.ibm.com> +(cherry picked from commit dddf725f70bfe7f5adb41fa31dbd06e767271bda) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 53 +++++++++++++++++++++++++------------------ + include/sysemu/dump.h | 13 ++++++++--- + 2 files changed, 41 insertions(+), 25 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index e6aa037f59..f6fe13e258 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -60,6 +60,11 @@ static inline bool dump_is_64bit(DumpState *s) + return s->dump_info.d_class == ELFCLASS64; + } + ++static inline bool dump_has_filter(DumpState *s) ++{ ++ return s->filter_area_length > 0; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -444,29 +449,30 @@ static void get_offset_range(hwaddr phys_addr, + *p_offset = -1; + *p_filesz = 0; + +- if (s->has_filter) { +- if (phys_addr < s->begin || phys_addr >= s->begin + s->length) { ++ if (dump_has_filter(s)) { ++ if (phys_addr < s->filter_area_begin || ++ phys_addr >= s->filter_area_begin + s->filter_area_length) { + return; + } + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (dump_has_filter(s)) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + /* This block is out of the range */ + continue; + } + +- if (s->begin <= block->target_start) { ++ if (s->filter_area_begin <= block->target_start) { + start = block->target_start; + } else { +- start = s->begin; ++ start = s->filter_area_begin; + } + + size_in_block = block->target_end - start; +- if (s->begin + s->length < block->target_end) { +- size_in_block -= block->target_end - (s->begin + s->length); ++ if (s->filter_area_begin + s->filter_area_length < block->target_end) { ++ size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length); + } + } else { + start = block->target_start; +@@ -639,12 +645,12 @@ static void dump_iterate(DumpState *s, Error **errp) + int64_t memblock_size, memblock_start; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length); + if (memblock_start == -1) { + continue; + } + +- memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length); + + /* Write the memory to file */ + write_memory(s, block, memblock_start, memblock_size, errp); +@@ -1513,14 +1519,14 @@ static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + +- if (!s->has_filter) { ++ if (!dump_has_filter(s)) { + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { + /* This block is out of the range */ +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + continue; + } + return 0; +@@ -1559,10 +1565,10 @@ static int64_t dump_calculate_size(DumpState *s) + int64_t size = 0, total = 0, left = 0, right = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { ++ if (dump_has_filter(s)) { + /* calculate the overlapped region. */ +- left = MAX(s->begin, block->target_start); +- right = MIN(s->begin + s->length, block->target_end); ++ left = MAX(s->filter_area_begin, block->target_start); ++ right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); + size = right - left; + size = size > 0 ? size : 0; + } else { +@@ -1652,9 +1658,12 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + s->fd = fd; +- s->has_filter = has_filter; +- s->begin = begin; +- s->length = length; ++ if (has_filter && !length) { ++ error_setg(errp, QERR_INVALID_PARAMETER, "length"); ++ goto cleanup; ++ } ++ s->filter_area_begin = begin; ++ s->filter_area_length = length; + + memory_mapping_list_init(&s->list); + +@@ -1787,8 +1796,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + return; + } + +- if (s->has_filter) { +- memory_mapping_filter(&s->list, s->begin, s->length); ++ if (dump_has_filter(s)) { ++ memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length); + } + + /* +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 7fce1d4af6..b62513d87d 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,9 +166,16 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- bool has_filter; +- int64_t begin; +- int64_t length; ++ /* ++ * Dump filter area variables ++ * ++ * A filtered dump only contains the guest memory designated by ++ * the start address and length variables defined below. ++ * ++ * If length is 0, no filtering is applied. ++ */ ++ int64_t filter_area_begin; /* Start address of partial guest memory area */ ++ int64_t filter_area_length; /* Length of partial guest memory area */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Rework-get_start_block.patch b/SOURCES/kvm-dump-Rework-get_start_block.patch new file mode 100644 index 0000000..f6bdde2 --- /dev/null +++ b/SOURCES/kvm-dump-Rework-get_start_block.patch @@ -0,0 +1,102 @@ +From b56c362132baef40cc25d910c1e0d217d83cfe44 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:57 +0000 +Subject: [PATCH 25/42] dump: Rework get_start_block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [25/41] c93842a1aaeadcc11e91c194452fcd05d163b3ca + +get_start_block() returns the start address of the first memory block +or -1. + +With the GuestPhysBlock iterator conversion we don't need to set the +start address and can therefore remove that code and the "start" +DumpState struct member. The only functionality left is the validation +of the start block so it only makes sense to re-name the function to +validate_start_block() + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-5-frankja@linux.ibm.com> +(cherry picked from commit 0c2994ac9009577b967529ce18e269da5b280351) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 20 ++++++-------------- + include/sysemu/dump.h | 2 -- + 2 files changed, 6 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index d981e843dd..e6aa037f59 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1509,30 +1509,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + } + } + +-static ram_addr_t get_start_block(DumpState *s) ++static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + + if (!s->has_filter) { +- s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ /* This block is out of the range */ + if (block->target_start >= s->begin + s->length || + block->target_end <= s->begin) { +- /* This block is out of the range */ + continue; + } +- +- s->next_block = block; +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } else { +- s->start = 0; +- } +- return s->start; +- } ++ return 0; ++ } + + return -1; + } +@@ -1679,8 +1671,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + goto cleanup; + } + +- s->start = get_start_block(s); +- if (s->start == -1) { ++ /* Is the filter filtering everything? */ ++ if (validate_start_block(s) == -1) { + error_setg(errp, QERR_INVALID_PARAMETER, "begin"); + goto cleanup; + } +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index ffc2ea1072..7fce1d4af6 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,8 +166,6 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- GuestPhysBlock *next_block; +- ram_addr_t start; + bool has_filter; + int64_t begin; + int64_t length; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch b/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch new file mode 100644 index 0000000..1f53426 --- /dev/null +++ b/SOURCES/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch @@ -0,0 +1,173 @@ +From d1e147a3133d4d31d4b0c02c05916366fadd9c30 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:00 +0000 +Subject: [PATCH 28/42] dump: Split elf header functions into prepare and write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [28/41] f70a13ad443835e7f46b7c5e176e372d370ac797 + +Let's split the write from the modification of the elf header so we +can consolidate the write of the data in one function. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-8-frankja@linux.ibm.com> +(cherry picked from commit 670e76998a61ca171200fcded3865b294a2d1243) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 100 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 53 insertions(+), 47 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 902a85ef8e..8d5226f861 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -132,7 +132,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + return 0; + } + +-static void write_elf64_header(DumpState *s, Error **errp) ++static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -140,34 +140,27 @@ static void write_elf64_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf64_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf64_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS64; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf64_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS64; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); +- } +- +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "dump: failed to write elf header"); ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } + } + +-static void write_elf32_header(DumpState *s, Error **errp) ++static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -175,28 +168,45 @@ static void write_elf32_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf32_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf32_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS32; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf32_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS32; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } ++} + +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); ++static void write_elf_header(DumpState *s, Error **errp) ++{ ++ Elf32_Ehdr elf32_header; ++ Elf64_Ehdr elf64_header; ++ size_t header_size; ++ void *header_ptr; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ prepare_elf64_header(s, &elf64_header); ++ header_size = sizeof(elf64_header); ++ header_ptr = &elf64_header; ++ } else { ++ prepare_elf32_header(s, &elf32_header); ++ header_size = sizeof(elf32_header); ++ header_ptr = &elf32_header; ++ } ++ ++ ret = fd_write_vmcore(header_ptr, header_size, s); + if (ret < 0) { + error_setg_errno(errp, -ret, "dump: failed to write elf header"); + } +@@ -565,11 +575,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (dump_is_64bit(s)) { +- write_elf64_header(s, errp); +- } else { +- write_elf32_header(s, errp); +- } ++ write_elf_header(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Use-ERRP_GUARD.patch b/SOURCES/kvm-dump-Use-ERRP_GUARD.patch new file mode 100644 index 0000000..1ef42ee --- /dev/null +++ b/SOURCES/kvm-dump-Use-ERRP_GUARD.patch @@ -0,0 +1,420 @@ +From 4ca61efe246d62d420eb332655c0c8ead4cc762b Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:55 +0000 +Subject: [PATCH 13/42] dump: Use ERRP_GUARD() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [13/41] f735cd1dab0230000cfadd878765fdf4647b239c + +Let's move to the new way of handling errors before changing the dump +code. This patch has mostly been generated by the coccinelle script +scripts/coccinelle/errp-guard.cocci. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-2-frankja@linux.ibm.com> +(cherry picked from commit 86a518bba4f4d7c9016fc5b104fe1e58b00ad756) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 144 ++++++++++++++++++++++------------------------------ + 1 file changed, 61 insertions(+), 83 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 662d0a62cd..9876123f2e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -390,23 +390,21 @@ static void write_data(DumpState *s, void *buf, int length, Error **errp) + static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start, + int64_t size, Error **errp) + { ++ ERRP_GUARD(); + int64_t i; +- Error *local_err = NULL; + + for (i = 0; i < size / s->dump_info.page_size; i++) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } + + if ((size % s->dump_info.page_size) != 0) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- size % s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ size % s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } +@@ -476,11 +474,11 @@ static void get_offset_range(hwaddr phys_addr, + + static void write_elf_loads(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; + uint32_t max_index; +- Error *local_err = NULL; + + if (s->have_section) { + max_index = s->sh_info; +@@ -494,14 +492,13 @@ static void write_elf_loads(DumpState *s, Error **errp) + s, &offset, &filesz); + if (s->dump_info.d_class == ELFCLASS64) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } else { + write_elf32_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } + +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + +@@ -514,7 +511,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + + /* + * the vmcore's format is: +@@ -542,73 +539,64 @@ static void dump_begin(DumpState *s, Error **errp) + + /* write elf header to vmcore */ + if (s->dump_info.d_class == ELFCLASS64) { +- write_elf64_header(s, &local_err); ++ write_elf64_header(s, errp); + } else { +- write_elf32_header(s, &local_err); ++ write_elf32_header(s, errp); + } +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + + if (s->dump_info.d_class == ELFCLASS64) { + /* write PT_NOTE to vmcore */ +- write_elf64_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 1, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 1, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } else { + /* write PT_NOTE to vmcore */ +- write_elf32_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 0, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 0, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } +@@ -644,9 +632,9 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block) + /* write all memory to vmcore */ + static void dump_iterate(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + GuestPhysBlock *block; + int64_t size; +- Error *local_err = NULL; + + do { + block = s->next_block; +@@ -658,9 +646,8 @@ static void dump_iterate(DumpState *s, Error **errp) + size -= block->target_end - (s->begin + s->length); + } + } +- write_memory(s, block, s->start, size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_memory(s, block, s->start, size, errp); ++ if (*errp) { + return; + } + +@@ -669,11 +656,10 @@ static void dump_iterate(DumpState *s, Error **errp) + + static void create_vmcore(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + +- dump_begin(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ dump_begin(s, errp); ++ if (*errp) { + return; + } + +@@ -810,6 +796,7 @@ static bool note_name_equal(DumpState *s, + /* write common header, sub header and elf note to vmcore */ + static void create_header32(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader32 *dh = NULL; + KdumpSubHeader32 *kh = NULL; + size_t size; +@@ -818,7 +805,6 @@ static void create_header32(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader32); +@@ -894,9 +880,8 @@ static void create_header32(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf32_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + if (write_buffer(s->fd, offset_note, s->note_buf, +@@ -922,6 +907,7 @@ out: + /* write common header, sub header and elf note to vmcore */ + static void create_header64(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader64 *dh = NULL; + KdumpSubHeader64 *kh = NULL; + size_t size; +@@ -930,7 +916,6 @@ static void create_header64(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader64); +@@ -1006,9 +991,8 @@ static void create_header64(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf64_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + +@@ -1472,8 +1456,8 @@ out: + + static void create_kdump_vmcore(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + int ret; +- Error *local_err = NULL; + + /* + * the kdump-compressed format is: +@@ -1503,21 +1487,18 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + return; + } + +- write_dump_header(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_header(s, errp); ++ if (*errp) { + return; + } + +- write_dump_bitmap(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_bitmap(s, errp); ++ if (*errp) { + return; + } + +- write_dump_pages(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_pages(s, errp); ++ if (*errp) { + return; + } + +@@ -1647,10 +1628,10 @@ static void dump_init(DumpState *s, int fd, bool has_format, + DumpGuestMemoryFormat format, bool paging, bool has_filter, + int64_t begin, int64_t length, Error **errp) + { ++ ERRP_GUARD(); + VMCoreInfoState *vmci = vmcoreinfo_find(); + CPUState *cpu; + int nr_cpus; +- Error *err = NULL; + int ret; + + s->has_format = has_format; +@@ -1769,9 +1750,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + + /* get memory mapping */ + if (paging) { +- qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err); +- if (err != NULL) { +- error_propagate(errp, err); ++ qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp); ++ if (*errp) { + goto cleanup; + } + } else { +@@ -1870,33 +1850,32 @@ cleanup: + /* this operation might be time consuming. */ + static void dump_process(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + DumpQueryResult *result = NULL; + + if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) { + #ifdef TARGET_X86_64 +- create_win_dump(s, &local_err); ++ create_win_dump(s, errp); + #endif + } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { +- create_kdump_vmcore(s, &local_err); ++ create_kdump_vmcore(s, errp); + } else { +- create_vmcore(s, &local_err); ++ create_vmcore(s, errp); + } + + /* make sure status is written after written_size updates */ + smp_wmb(); + qatomic_set(&s->status, +- (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); ++ (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); + + /* send DUMP_COMPLETED message (unconditionally) */ + result = qmp_query_dump(NULL); + /* should never fail */ + assert(result); +- qapi_event_send_dump_completed(result, !!local_err, (local_err ? +- error_get_pretty(local_err) : NULL)); ++ qapi_event_send_dump_completed(result, !!*errp, (*errp ? ++ error_get_pretty(*errp) : NULL)); + qapi_free_DumpQueryResult(result); + +- error_propagate(errp, local_err); + dump_cleanup(s); + } + +@@ -1925,10 +1904,10 @@ void qmp_dump_guest_memory(bool paging, const char *file, + int64_t length, bool has_format, + DumpGuestMemoryFormat format, Error **errp) + { ++ ERRP_GUARD(); + const char *p; + int fd = -1; + DumpState *s; +- Error *local_err = NULL; + bool detach_p = false; + + if (runstate_check(RUN_STATE_INMIGRATE)) { +@@ -2028,9 +2007,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, + dump_state_prepare(s); + + dump_init(s, fd, has_format, format, paging, has_begin, +- begin, length, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ begin, length, errp); ++ if (*errp) { + qatomic_set(&s->status, DUMP_STATUS_FAILED); + return; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch b/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch new file mode 100644 index 0000000..8ea0a7e --- /dev/null +++ b/SOURCES/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch @@ -0,0 +1,150 @@ +From a918c7305ec7c68e8bc37b449f71e75d84124cd0 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:13 +0000 +Subject: [PATCH 32/42] dump: Use a buffer for ELF section data and headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [32/41] e1a03e202e67764581e486f37e13e479200e5846 + +Currently we're writing the NULL section header if we overflow the +physical header number in the ELF header. But in the future we'll add +custom section headers AND section data. + +To facilitate this we need to rearange section handling a bit. As with +the other ELF headers we split the code into a prepare and a write +step. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-2-frankja@linux.ibm.com> +(cherry picked from commit e41ed29bcee5cb16715317bcf290f6b5c196eb0a) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 75 +++++++++++++++++++++++++++++-------------- + include/sysemu/dump.h | 2 ++ + 2 files changed, 53 insertions(+), 24 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88177fa886..4142b4cc0c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -381,31 +381,60 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + } + } + +-static void write_elf_section(DumpState *s, int type, Error **errp) ++static void prepare_elf_section_hdr_zero(DumpState *s) + { +- Elf32_Shdr shdr32; +- Elf64_Shdr shdr64; +- int shdr_size; +- void *shdr; +- int ret; ++ if (dump_is_64bit(s)) { ++ Elf64_Shdr *shdr64 = s->elf_section_hdrs; + +- if (type == 0) { +- shdr_size = sizeof(Elf32_Shdr); +- memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr32; ++ shdr64->sh_info = cpu_to_dump32(s, s->phdr_num); + } else { +- shdr_size = sizeof(Elf64_Shdr); +- memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr64; ++ Elf32_Shdr *shdr32 = s->elf_section_hdrs; ++ ++ shdr32->sh_info = cpu_to_dump32(s, s->phdr_num); ++ } ++} ++ ++static void prepare_elf_section_hdrs(DumpState *s) ++{ ++ size_t len, sizeof_shdr; ++ ++ /* ++ * Section ordering: ++ * - HDR zero ++ */ ++ sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ len = sizeof_shdr * s->shdr_num; ++ s->elf_section_hdrs = g_malloc0(len); ++ ++ /* ++ * The first section header is ALWAYS a special initial section ++ * header. ++ * ++ * The header should be 0 with one exception being that if ++ * phdr_num is PN_XNUM then the sh_info field contains the real ++ * number of segment entries. ++ * ++ * As we zero allocate the buffer we will only need to modify ++ * sh_info for the PN_XNUM case. ++ */ ++ if (s->phdr_num >= PN_XNUM) { ++ prepare_elf_section_hdr_zero(s); + } ++} + +- ret = fd_write_vmcore(shdr, shdr_size, s); ++static void write_elf_section_headers(DumpState *s, Error **errp) ++{ ++ size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ int ret; ++ ++ prepare_elf_section_hdrs(s); ++ ++ ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write section header table"); ++ error_setg_errno(errp, -ret, "dump: failed to write section headers"); + } ++ ++ g_free(s->elf_section_hdrs); + } + + static void write_data(DumpState *s, void *buf, int length, Error **errp) +@@ -592,12 +621,10 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); ++ if (*errp) { ++ return; + } + + /* write notes to vmcore */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b62513d87d..9995f65dc8 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -177,6 +177,8 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ void *elf_section_hdrs; /* Pointer to section header buffer */ ++ + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ + uint32_t nr_cpus; /* number of guest's cpu */ +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch b/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch new file mode 100644 index 0000000..2efd686 --- /dev/null +++ b/SOURCES/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch @@ -0,0 +1,104 @@ +From 987ede93fa4e3d058acddc19874e467faa116ede Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:14 +0000 +Subject: [PATCH 33/42] dump: Write ELF section headers right after ELF header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [33/41] e956040753533ac376e9763145192de1e216027d + +Let's start bundling the writes of the headers and of the data so we +have a clear ordering between them. Since the ELF header uses offsets +to the headers we can freely order them. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-3-frankja@linux.ibm.com> +(cherry picked from commit cb415fd61e48d52f81dcf38956e3f913651cff1c) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 31 ++++++++++++++----------------- + 1 file changed, 14 insertions(+), 17 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4142b4cc0c..d17537d4e9 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -584,6 +584,8 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | elf header | + * -------------- ++ * | sctn_hdr | ++ * -------------- + * | PT_NOTE | + * -------------- + * | PT_LOAD | +@@ -592,8 +594,6 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | PT_LOAD | + * -------------- +- * | sec_hdr | +- * -------------- + * | elf note | + * -------------- + * | memory | +@@ -609,20 +609,20 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write PT_NOTE to vmcore */ +- write_elf_phdr_note(s, errp); ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); + if (*errp) { + return; + } + +- /* write all PT_LOADs to vmcore */ +- write_elf_phdr_loads(s, errp); ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); + if (*errp) { + return; + } + +- /* write section headers to vmcore */ +- write_elf_section_headers(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +@@ -1877,16 +1877,13 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (dump_is_64bit(s)) { +- s->phdr_offset = sizeof(Elf64_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf64_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + } else { +- +- s->phdr_offset = sizeof(Elf32_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf32_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } + + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch b/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch new file mode 100644 index 0000000..16e6e87 --- /dev/null +++ b/SOURCES/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch @@ -0,0 +1,173 @@ +From deaf4e0f5e90d227b7b9f3e5d1dff7fd0bc0206a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 5 Sep 2022 16:06:21 +0400 +Subject: [PATCH 31/42] dump: fix kdump to work over non-aligned blocks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [31/41] b307bdce4a4791fc30160fa2a1678bd238f2432e + +Rewrite get_next_page() to work over non-aligned blocks. When it +encounters non aligned addresses, it will try to fill a page provided by +the caller. + +This solves a kdump crash with "tpm-crb-cmd" RAM memory region, +qemu-kvm: ../dump/dump.c:1162: _Bool get_next_page(GuestPhysBlock **, +uint64_t *, uint8_t **, DumpState *): Assertion `(block->target_start & +~target_page_mask) == 0' failed. + +because: +guest_phys_block_add_section: target_start=00000000fed40080 target_end=00000000fed41000: added (count: 4) + +Fixes: +https://bugzilla.redhat.com/show_bug.cgi?id=2120480 + +Signed-off-by: Marc-André Lureau +Acked-by: David Hildenbrand +(cherry picked from commit 94d788408d2d5a6474c99b2c9cf06913b9db7c58) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 79 +++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 56 insertions(+), 23 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 1c49232390..88177fa886 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1117,50 +1117,81 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn) + } + + /* +- * exam every page and return the page frame number and the address of the page. +- * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys +- * blocks, so block->target_start and block->target_end should be interal +- * multiples of the target page size. ++ * Return the page frame number and the page content in *bufptr. bufptr can be ++ * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated ++ * memory. This is not necessarily the memory returned. + */ + static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + uint8_t **bufptr, DumpState *s) + { + GuestPhysBlock *block = *blockptr; +- hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1); +- uint8_t *buf; ++ uint32_t page_size = s->dump_info.page_size; ++ uint8_t *buf = NULL, *hbuf; ++ hwaddr addr; + + /* block == NULL means the start of the iteration */ + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; + addr = block->target_start; ++ *pfnptr = dump_paddr_to_pfn(s, addr); + } else { +- addr = dump_pfn_to_paddr(s, *pfnptr + 1); ++ *pfnptr += 1; ++ addr = dump_pfn_to_paddr(s, *pfnptr); + } + assert(block != NULL); + +- if ((addr >= block->target_start) && +- (addr + s->dump_info.page_size <= block->target_end)) { +- buf = block->host_addr + (addr - block->target_start); +- } else { +- /* the next page is in the next block */ +- block = QTAILQ_NEXT(block, next); +- *blockptr = block; +- if (!block) { +- return false; ++ while (1) { ++ if (addr >= block->target_start && addr < block->target_end) { ++ size_t n = MIN(block->target_end - addr, page_size - addr % page_size); ++ hbuf = block->host_addr + (addr - block->target_start); ++ if (!buf) { ++ if (n == page_size) { ++ /* this is a whole target page, go for it */ ++ assert(addr % page_size == 0); ++ buf = hbuf; ++ break; ++ } else if (bufptr) { ++ assert(*bufptr); ++ buf = *bufptr; ++ memset(buf, 0, page_size); ++ } else { ++ return true; ++ } ++ } ++ ++ memcpy(buf + addr % page_size, hbuf, n); ++ addr += n; ++ if (addr % page_size == 0) { ++ /* we filled up the page */ ++ break; ++ } ++ } else { ++ /* the next page is in the next block */ ++ *blockptr = block = QTAILQ_NEXT(block, next); ++ if (!block) { ++ break; ++ } ++ ++ addr = block->target_start; ++ /* are we still in the same page? */ ++ if (dump_paddr_to_pfn(s, addr) != *pfnptr) { ++ if (buf) { ++ /* no, but we already filled something earlier, return it */ ++ break; ++ } else { ++ /* else continue from there */ ++ *pfnptr = dump_paddr_to_pfn(s, addr); ++ } ++ } + } +- addr = block->target_start; +- buf = block->host_addr; + } + +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } + +- return true; ++ return buf != NULL; + } + + static void write_dump_bitmap(DumpState *s, Error **errp) +@@ -1306,6 +1337,7 @@ static void write_dump_pages(DumpState *s, Error **errp) + uint8_t *buf; + GuestPhysBlock *block_iter = NULL; + uint64_t pfn_iter; ++ g_autofree uint8_t *page = NULL; + + /* get offset of page_desc and page_data in dump file */ + offset_desc = s->offset_page; +@@ -1341,12 +1373,13 @@ static void write_dump_pages(DumpState *s, Error **errp) + } + + offset_data += s->dump_info.page_size; ++ page = g_malloc(s->dump_info.page_size); + + /* + * dump memory to vmcore page by page. zero page will all be resided in the + * first page of page section + */ +- while (get_next_page(&block_iter, &pfn_iter, &buf, s)) { ++ for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) { + /* check zero page */ + if (is_zero_page(buf, s->dump_info.page_size)) { + ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor), +-- +2.37.3 + diff --git a/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch b/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch new file mode 100644 index 0000000..9780d90 --- /dev/null +++ b/SOURCES/kvm-dump-simplify-a-bit-kdump-get_next_page.patch @@ -0,0 +1,75 @@ +From bb55fde4d8ca587e2ef52ce58a0c22e4d66a08dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 25 Aug 2022 12:40:12 +0400 +Subject: [PATCH 30/42] dump: simplify a bit kdump get_next_page() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [30/41] 417ac19fa96036e0242f40121ac6e87a9f3f70ba + +This should be functionally equivalent, but slightly easier to read, +with simplified paths and checks at the end of the function. + +The following patch is a major rewrite to get rid of the assert(). + +Signed-off-by: Marc-André Lureau +Reviewed-by: David Hildenbrand +(cherry picked from commit 08df343874fcddd260021a04ce3c5a34f2c48164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index c2c1341ad7..1c49232390 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1133,17 +1133,11 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); +- if (bufptr) { +- *bufptr = block->host_addr; +- } +- return true; ++ addr = block->target_start; ++ } else { ++ addr = dump_pfn_to_paddr(s, *pfnptr + 1); + } +- +- *pfnptr = *pfnptr + 1; +- addr = dump_pfn_to_paddr(s, *pfnptr); ++ assert(block != NULL); + + if ((addr >= block->target_start) && + (addr + s->dump_info.page_size <= block->target_end)) { +@@ -1155,12 +1149,13 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + return false; + } +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); ++ addr = block->target_start; + buf = block->host_addr; + } + ++ assert((block->target_start & ~target_page_mask) == 0); ++ assert((block->target_end & ~target_page_mask) == 0); ++ *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..eea0eea --- /dev/null +++ b/SOURCES/kvm-edu-add-smp_mb__after_rmw.patch @@ -0,0 +1,61 @@ +From 7693449b235bbab6d32a1b87fa1d0e101c786f3b Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:11:14 -0500 +Subject: [PATCH 05/13] edu: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [5/10] 300901290e08b253b1278eedc39cd07c1e202b96 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 2482aeea4195ad84cf3d4e5b15b28ec5b420ed5a +Author: Paolo Bonzini +Date: Thu Mar 2 11:16:13 2023 +0100 + + edu: add smp_mb__after_rmw() + + Ensure ordering between clearing the COMPUTING flag and checking + IRQFACT, and between setting the IRQFACT flag and checking + COMPUTING. This ensures that no wakeups are lost. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/misc/edu.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/edu.c b/hw/misc/edu.c +index e935c418d4..a1f8bc77e7 100644 +--- a/hw/misc/edu.c ++++ b/hw/misc/edu.c +@@ -267,6 +267,8 @@ static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val, + case 0x20: + if (val & EDU_STATUS_IRQFACT) { + qatomic_or(&edu->status, EDU_STATUS_IRQFACT); ++ /* Order check of the COMPUTING flag after setting IRQFACT. */ ++ smp_mb__after_rmw(); + } else { + qatomic_and(&edu->status, ~EDU_STATUS_IRQFACT); + } +@@ -349,6 +351,9 @@ static void *edu_fact_thread(void *opaque) + qemu_mutex_unlock(&edu->thr_mutex); + qatomic_and(&edu->status, ~EDU_STATUS_COMPUTING); + ++ /* Clear COMPUTING flag before checking IRQFACT. */ ++ smp_mb__after_rmw(); ++ + if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { + qemu_mutex_lock_iothread(); + edu_raise_irq(edu, FACT_IRQ); +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch b/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch new file mode 100644 index 0000000..5070722 --- /dev/null +++ b/SOURCES/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch @@ -0,0 +1,81 @@ +From edead46187b1e55ad5e238332780aef19f1bc214 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 1/2] hw/acpi: Add ospm_status hook implementation for acpi-ged + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [1/2] 99730b1a27666ca745dc28d90751c938d43f1682 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9 +Author: Keqian Zhu +Date: Tue Aug 16 17:49:57 2022 +0800 + + hw/acpi: Add ospm_status hook implementation for acpi-ged + + Setup an ARM virtual machine of machine virt and execute qmp "query-acpi-ospm-status" + causes segmentation fault with following dumpstack: + #1 0x0000aaaaab64235c in qmp_query_acpi_ospm_status (errp=errp@entry=0xfffffffff030) at ../monitor/qmp-cmds.c:312 + #2 0x0000aaaaabfc4e20 in qmp_marshal_query_acpi_ospm_status (args=, ret=0xffffea4ffe90, errp=0xffffea4ffe88) at qapi/qapi-commands-acpi.c:63 + #3 0x0000aaaaabff8ba0 in do_qmp_dispatch_bh (opaque=0xffffea4ffe98) at ../qapi/qmp-dispatch.c:128 + #4 0x0000aaaaac02e594 in aio_bh_call (bh=0xffffe0004d80) at ../util/async.c:150 + #5 aio_bh_poll (ctx=ctx@entry=0xaaaaad0f6040) at ../util/async.c:178 + #6 0x0000aaaaac00bd40 in aio_dispatch (ctx=ctx@entry=0xaaaaad0f6040) at ../util/aio-posix.c:421 + #7 0x0000aaaaac02e010 in aio_ctx_dispatch (source=0xaaaaad0f6040, callback=, user_data=) at ../util/async.c:320 + #8 0x0000fffff76f6884 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 + #9 0x0000aaaaac0452d4 in glib_pollfds_poll () at ../util/main-loop.c:297 + #10 os_host_main_loop_wait (timeout=0) at ../util/main-loop.c:320 + #11 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:596 + #12 0x0000aaaaab5c9e50 in qemu_main_loop () at ../softmmu/runstate.c:734 + #13 0x0000aaaaab185370 in qemu_main (argc=argc@entry=47, argv=argv@entry=0xfffffffff518, envp=envp@entry=0x0) at ../softmmu/main.c:38 + #14 0x0000aaaaab16f99c in main (argc=47, argv=0xfffffffff518) at ../softmmu/main.c:47 + + Fixes: ebb62075021a ("hw/acpi: Add ACPI Generic Event Device Support") + Signed-off-by: Keqian Zhu + Reviewed-by: Igor Mammedov + Message-id: 20220816094957.31700-1-zhukeqian1@huawei.com + Signed-off-by: Peter Maydell + +(cherry picked from commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9) +Signed-off-by: Jon Maloy +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index e28457a7d1..a3d31631fe 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -267,6 +267,13 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + } + } + ++static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) ++{ ++ AcpiGedState *s = ACPI_GED(adev); ++ ++ acpi_memory_ospm_status(&s->memhp_state, list); ++} ++ + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + { + AcpiGedState *s = ACPI_GED(adev); +@@ -409,6 +416,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + hc->unplug_request = acpi_ged_unplug_request_cb; + hc->unplug = acpi_ged_unplug_cb; + ++ adevc->ospm_status = acpi_ged_ospm_status; + adevc->send_event = acpi_ged_send_event; + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch b/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch new file mode 100644 index 0000000..f3c5492 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-8.6-machine-type.patch @@ -0,0 +1,57 @@ +From a154eb35d738aecf552d57d99499facce1c834ba Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:24:24 +0100 +Subject: [PATCH 4/6] hw/arm/virt: Add 8.6 machine type + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [4/5] d0df3e796d3e9a6ca2af1e3b33fc6021bcac5d09 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Add 8.6 machine type. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a4173b6c3..c9c17b9d45 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3228,17 +3228,23 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel860_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0) ++ + static void rhel850_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel860_virt_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + vmc->no_cpu_topology = true; + vmc->no_tcg_its = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) + + static void rhel840_virt_options(MachineClass *mc) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch new file mode 100644 index 0000000..679f436 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch @@ -0,0 +1,86 @@ +From 1b4a8daf695a81f18ba70bea91b199da215da4e1 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jan 2022 16:17:10 +0100 +Subject: [PATCH 5/6] hw/arm/virt: Check no_tcg_its and minor style changes + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [5/5] 57e77446ff5a1a7efe152b2c907c0a0ca5487ab7 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Truly allow TCG ITS instantiation according to the no_tcg_its +class flag. Otherwise it is always set to false. + +We also take benefit of this patch to do some minor non +functional style changes to be closer to the upstream code. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c9c17b9d45..dbf0a6d62f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3157,6 +3157,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3164,6 +3165,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3188,24 +3190,32 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- vms->default_bus_bypass_iommu = false; + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- + } + + static const TypeInfo rhel_machine_info = { +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..734756d --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,78 @@ +From 8d5b57798d079307a98f6be5e1f6d28d1937a2fe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:50:44 +0100 +Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [1/5] 74b01bb90213493db700d5bdf81dd99892571972 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register the "iommu" option as a class property. This mirrors what +was done in upstream commit b91def7b ("arm/virt: Register +most properties as class properties"). + +While we are at it we also move the "x-oem-id" and "x-oem-table-id" +registrations at the very end of the rhel_machine_class_init() +function. This makes our life easier when comparing with upstream. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e8941afd01..684ffce52e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3146,10 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", +- virt_get_default_bus_bypass_iommu, +- virt_set_default_bus_bypass_iommu); +- + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3183,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..91b353a --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,57 @@ +From 07e2094cd86c1be349c0bdda69acd1857afacb66 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 16:04:59 +0100 +Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [2/5] 4ddfa57495578127770f93689c4d9f111a12b91c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register "its" as a class property. This mirrors what was done +in commit 27edeeaafe43 ("virt: Register "its" as class property"). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 684ffce52e..d679391eb0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch new file mode 100644 index 0000000..25e20ea --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch @@ -0,0 +1,46 @@ +From e896ba2bfbb613576ec3fbe5b948a326ac06193d Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:58:38 +0100 +Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [3/5] 3ed0425391dab7cf14c6e66fc1b2430be1152d6c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". +This mirrors what was done in upstream commit: +9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d679391eb0..6a4173b6c3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch new file mode 100644 index 0000000..01e4097 --- /dev/null +++ b/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch @@ -0,0 +1,97 @@ +From fe4abbda80eea7f65b6b5cc544a806fb6e064917 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:32 +0100 +Subject: [PATCH 2/3] hw/block/fdc: Prevent end-of-track overrun + (CVE-2021-3507) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [1/2] 31fa0351382b4ca5bd989b09e4d811ae73040673 (jmaloy/qemu-kvm) +RH-Bugzilla: 1951521 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Reitz + +Per the 82078 datasheet, if the end-of-track (EOT byte in +the FIFO) is more than the number of sectors per side, the +command is terminated unsuccessfully: + +* 5.2.5 DATA TRANSFER TERMINATION + + The 82078 supports terminal count explicitly through + the TC pin and implicitly through the underrun/over- + run and end-of-track (EOT) functions. For full sector + transfers, the EOT parameter can define the last + sector to be transferred in a single or multisector + transfer. If the last sector to be transferred is a par- + tial sector, the host can stop transferring the data in + mid-sector, and the 82078 will continue to complete + the sector as if a hardware TC was received. The + only difference between these implicit functions and + TC is that they return "abnormal termination" result + status. Such status indications can be ignored if they + were expected. + +* 6.1.3 READ TRACK + + This command terminates when the EOT specified + number of sectors have been read. If the 82078 + does not find an I D Address Mark on the diskette + after the second· occurrence of a pulse on the + INDX# pin, then it sets the IC code in Status Regis- + ter 0 to "01" (Abnormal termination), sets the MA bit + in Status Register 1 to "1", and terminates the com- + mand. + +* 6.1.6 VERIFY + + Refer to Table 6-6 and Table 6-7 for information + concerning the values of MT and EC versus SC and + EOT value. + +* Table 6·6. Result Phase Table + +* Table 6-7. Verify Command Result Phase Table + +Fix by aborting the transfer when EOT > # Sectors Per Side. + +Cc: qemu-stable@nongnu.org +Cc: Hervé Poussineau +Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") +Reported-by: Alexander Bulekov +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20211118115733.4038610-2-philmd@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 97fa6de423..755a26c114 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -1531,6 +1531,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) + int tmp; + fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); + tmp = (fdctrl->fifo[6] - ks + 1); ++ if (tmp < 0) { ++ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); ++ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); ++ fdctrl->fifo[3] = kt; ++ fdctrl->fifo[4] = kh; ++ fdctrl->fifo[5] = ks; ++ return; ++ } + if (fdctrl->fifo[0] & 0x80) + tmp += fdctrl->fifo[6]; + fdctrl->data_len *= tmp; +-- +2.35.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch b/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch new file mode 100644 index 0000000..d4ca84f --- /dev/null +++ b/SOURCES/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch @@ -0,0 +1,52 @@ +From 100f33ff8a1d55986e43b99ba8726abc29ee8d26 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 5/5] hw/display/qxl: Assert memory slot fits in preallocated + MemoryRegion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [5/5] f809ce48e7989dd6547b7c8bf1a5efc3fdcacbac (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 86fdb0582c653a9824183679403a85f588260d62 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:41 2022 +0100 + + hw/display/qxl: Assert memory slot fits in preallocated MemoryRegion + + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-6-philmd@linaro.org> + +(cherry picked from commit 86fdb0582c653a9824183679403a85f588260d62) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 2a4b2d4158..bcd9e8716a 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -1372,6 +1372,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta, + qxl_set_guest_bug(d, "%s: pci_region = %d", __func__, pci_region); + return 1; + } ++ assert(guest_end - pci_start <= memory_region_size(mr)); + + virt_start = (intptr_t)memory_region_get_ram_ptr(mr); + memslot.slot_id = slot_id; +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch b/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch new file mode 100644 index 0000000..9163570 --- /dev/null +++ b/SOURCES/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch @@ -0,0 +1,130 @@ +From 4e1bfbe3a0a113fe3cf39336a9d7da4e8c2a21ea Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 4/5] hw/display/qxl: Avoid buffer overrun in qxl_phys2virt + (CVE-2022-4144) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [4/5] afe53f8d9b31c6fd8211fe172173151f3255e67c (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:40 2022 +0100 + + hw/display/qxl: Avoid buffer overrun in qxl_phys2virt (CVE-2022-4144) + + Have qxl_get_check_slot_offset() return false if the requested + buffer size does not fit within the slot memory region. + + Similarly qxl_phys2virt() now returns NULL in such case, and + qxl_dirty_one_surface() aborts. + + This avoids buffer overrun in the host pointer returned by + memory_region_get_ram_ptr(). + + Fixes: CVE-2022-4144 (out-of-bounds read) + Reported-by: Wenxu Yin (@awxylitol) + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1336 + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-5-philmd@linaro.org> + +(cherry picked from commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.c | 27 +++++++++++++++++++++++---- + hw/display/qxl.h | 2 +- + 2 files changed, 24 insertions(+), 5 deletions(-) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index aa9065183e..2a4b2d4158 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -1412,11 +1412,13 @@ static void qxl_reset_surfaces(PCIQXLDevice *d) + + /* can be also called from spice server thread context */ + static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, +- uint32_t *s, uint64_t *o) ++ uint32_t *s, uint64_t *o, ++ size_t size_requested) + { + uint64_t phys = le64_to_cpu(pqxl); + uint32_t slot = (phys >> (64 - 8)) & 0xff; + uint64_t offset = phys & 0xffffffffffff; ++ uint64_t size_available; + + if (slot >= NUM_MEMSLOTS) { + qxl_set_guest_bug(qxl, "slot too large %d >= %d", slot, +@@ -1440,6 +1442,23 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + slot, offset, qxl->guest_slots[slot].size); + return false; + } ++ size_available = memory_region_size(qxl->guest_slots[slot].mr); ++ if (qxl->guest_slots[slot].offset + offset >= size_available) { ++ qxl_set_guest_bug(qxl, ++ "slot %d offset %"PRIu64" > region size %"PRIu64"\n", ++ slot, qxl->guest_slots[slot].offset + offset, ++ size_available); ++ return false; ++ } ++ size_available -= qxl->guest_slots[slot].offset + offset; ++ if (size_requested > size_available) { ++ qxl_set_guest_bug(qxl, ++ "slot %d offset %"PRIu64" size %zu: " ++ "overrun by %"PRIu64" bytes\n", ++ slot, offset, size_requested, ++ size_requested - size_available); ++ return false; ++ } + + *s = slot; + *o = offset; +@@ -1459,7 +1478,7 @@ void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id, + offset = le64_to_cpu(pqxl) & 0xffffffffffff; + return (void *)(intptr_t)offset; + case MEMSLOT_GROUP_GUEST: +- if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset)) { ++ if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size)) { + return NULL; + } + ptr = memory_region_get_ram_ptr(qxl->guest_slots[slot].mr); +@@ -1925,9 +1944,9 @@ static void qxl_dirty_one_surface(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + uint32_t slot; + bool rc; + +- rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset); +- assert(rc == true); + size = (uint64_t)height * abs(stride); ++ rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size); ++ assert(rc == true); + trace_qxl_surfaces_dirty(qxl->id, offset, size); + qxl_set_dirty(qxl->guest_slots[slot].mr, + qxl->guest_slots[slot].offset + offset, +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index c784315daa..89ca832cf9 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -157,7 +157,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * + * Returns a host pointer to a buffer placed at offset @phys within the + * active slot @group_id of the PCI VGA RAM memory region associated with +- * the @qxl device. If the slot is inactive, or the offset is out ++ * the @qxl device. If the slot is inactive, or the offset + size are out + * of the memory region, returns NULL. + * + * Use with care; by the time this function returns, the returned pointer is +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch b/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch new file mode 100644 index 0000000..9bf2fe2 --- /dev/null +++ b/SOURCES/kvm-hw-display-qxl-Document-qxl_phys2virt.patch @@ -0,0 +1,70 @@ +From 068c531fb968ec04509b85f524d0745e6acf5449 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 2/5] hw/display/qxl: Document qxl_phys2virt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/5] f84c0b379022c527fc2508a242443d86454944c0 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:38 2022 +0100 + + hw/display/qxl: Document qxl_phys2virt() + + Reviewed-by: Marc-André Lureau + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-3-philmd@linaro.org> + +(cherry picked from commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index 30d21f4d0b..c938f88a2f 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -147,6 +147,25 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + #define QXL_DEFAULT_REVISION (QXL_REVISION_STABLE_V12 + 1) + + /* qxl.c */ ++/** ++ * qxl_phys2virt: Get a pointer within a PCI VRAM memory region. ++ * ++ * @qxl: QXL device ++ * @phys: physical offset of buffer within the VRAM ++ * @group_id: memory slot group ++ * ++ * Returns a host pointer to a buffer placed at offset @phys within the ++ * active slot @group_id of the PCI VGA RAM memory region associated with ++ * the @qxl device. If the slot is inactive, or the offset is out ++ * of the memory region, returns NULL. ++ * ++ * Use with care; by the time this function returns, the returned pointer is ++ * not protected by RCU anymore. If the caller is not within an RCU critical ++ * section and does not hold the iothread lock, it must have other means of ++ * protecting the pointer, such as a reference to the region that includes ++ * the incoming ram_addr_t. ++ * ++ */ + void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id); + void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) + GCC_FMT_ATTR(2, 3); +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch b/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch new file mode 100644 index 0000000..c644ab2 --- /dev/null +++ b/SOURCES/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch @@ -0,0 +1,74 @@ +From 5ec8d909d40fa04ef2c3572e01509a1866786070 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 1/5] hw/display/qxl: Have qxl_log_command Return early if no + log_cmd handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/5] 33d94f40c46cccbc32d108d1035365917bf90356 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 61c34fc194b776ecadc39fb26b061331107e5599 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:37 2022 +0100 + + hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler + + Only 3 command types are logged: no need to call qxl_phys2virt() + for the other types. Using different cases will help to pass + different structure sizes to qxl_phys2virt() in a pair of commits. + + Reviewed-by: Marc-André Lureau + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-2-philmd@linaro.org> + +(cherry picked from commit 61c34fc194b776ecadc39fb26b061331107e5599) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-logger.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c +index 68bfa47568..1bcf803db6 100644 +--- a/hw/display/qxl-logger.c ++++ b/hw/display/qxl-logger.c +@@ -247,6 +247,16 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + qxl_name(qxl_type, ext->cmd.type), + compat ? "(compat)" : ""); + ++ switch (ext->cmd.type) { ++ case QXL_CMD_DRAW: ++ break; ++ case QXL_CMD_SURFACE: ++ break; ++ case QXL_CMD_CURSOR: ++ break; ++ default: ++ goto out; ++ } + data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); + if (!data) { + return 1; +@@ -269,6 +279,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + qxl_log_cmd_cursor(qxl, data, ext->group_id); + break; + } ++out: + fprintf(stderr, "\n"); + return 0; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch b/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch new file mode 100644 index 0000000..dd902f7 --- /dev/null +++ b/SOURCES/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch @@ -0,0 +1,234 @@ +From 0e6bd3911c4971f575aac7e9cd726467b52fe544 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 3/5] hw/display/qxl: Pass requested buffer size to + qxl_phys2virt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/5] 8e362d67fe7fef9eb457cfb15d75b298fed725c3 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:39 2022 +0100 + + hw/display/qxl: Pass requested buffer size to qxl_phys2virt() + + Currently qxl_phys2virt() doesn't check for buffer overrun. + In order to do so in the next commit, pass the buffer size + as argument. + + For QXLCursor in qxl_render_cursor() -> qxl_cursor() we + verify the size of the chunked data ahead, checking we can + access 'sizeof(QXLCursor) + chunk->data_size' bytes. + Since in the SPICE_CURSOR_TYPE_MONO case the cursor is + assumed to fit in one chunk, no change are required. + In SPICE_CURSOR_TYPE_ALPHA the ahead read is handled in + qxl_unpack_chunks(). + + Signed-off-by: Philippe Mathieu-Daudé + Acked-by: Gerd Hoffmann + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-4-philmd@linaro.org> + +(cherry picked from commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-logger.c | 11 ++++++++--- + hw/display/qxl-render.c | 20 ++++++++++++++++---- + hw/display/qxl.c | 14 +++++++++----- + hw/display/qxl.h | 4 +++- + 4 files changed, 36 insertions(+), 13 deletions(-) + +diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c +index 1bcf803db6..35c38f6252 100644 +--- a/hw/display/qxl-logger.c ++++ b/hw/display/qxl-logger.c +@@ -106,7 +106,7 @@ static int qxl_log_image(PCIQXLDevice *qxl, QXLPHYSICAL addr, int group_id) + QXLImage *image; + QXLImageDescriptor *desc; + +- image = qxl_phys2virt(qxl, addr, group_id); ++ image = qxl_phys2virt(qxl, addr, group_id, sizeof(QXLImage)); + if (!image) { + return 1; + } +@@ -214,7 +214,8 @@ int qxl_log_cmd_cursor(PCIQXLDevice *qxl, QXLCursorCmd *cmd, int group_id) + cmd->u.set.position.y, + cmd->u.set.visible ? "yes" : "no", + cmd->u.set.shape); +- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id); ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id, ++ sizeof(QXLCursor)); + if (!cursor) { + return 1; + } +@@ -236,6 +237,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + { + bool compat = ext->flags & QXL_COMMAND_FLAG_COMPAT; + void *data; ++ size_t datasz; + int ret; + + if (!qxl->cmdlog) { +@@ -249,15 +251,18 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + + switch (ext->cmd.type) { + case QXL_CMD_DRAW: ++ datasz = compat ? sizeof(QXLCompatDrawable) : sizeof(QXLDrawable); + break; + case QXL_CMD_SURFACE: ++ datasz = sizeof(QXLSurfaceCmd); + break; + case QXL_CMD_CURSOR: ++ datasz = sizeof(QXLCursorCmd); + break; + default: + goto out; + } +- data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, datasz); + if (!data) { + return 1; + } +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index ca217004bf..fcfd40c3ac 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -107,7 +107,9 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) + qxl->guest_primary.resized = 0; + qxl->guest_primary.data = qxl_phys2virt(qxl, + qxl->guest_primary.surface.mem, +- MEMSLOT_GROUP_GUEST); ++ MEMSLOT_GROUP_GUEST, ++ qxl->guest_primary.abs_stride ++ * height); + if (!qxl->guest_primary.data) { + goto end; + } +@@ -228,7 +230,8 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, + if (offset == size) { + return; + } +- chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id); ++ chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id, ++ sizeof(QXLDataChunk) + chunk->data_size); + if (!chunk) { + return; + } +@@ -295,7 +298,8 @@ fail: + /* called from spice server thread context only */ + int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) + { +- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLCursorCmd)); + QXLCursor *cursor; + QEMUCursor *c; + +@@ -314,7 +318,15 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) + } + switch (cmd->type) { + case QXL_CURSOR_SET: +- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id); ++ /* First read the QXLCursor to get QXLDataChunk::data_size ... */ ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id, ++ sizeof(QXLCursor)); ++ if (!cursor) { ++ return 1; ++ } ++ /* Then read including the chunked data following QXLCursor. */ ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id, ++ sizeof(QXLCursor) + cursor->chunk.data_size); + if (!cursor) { + return 1; + } +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 29c80b4289..aa9065183e 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -274,7 +274,8 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay) + QXL_IO_MONITORS_CONFIG_ASYNC)); + } + +- cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST); ++ cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST, ++ sizeof(QXLMonitorsConfig)); + if (cfg != NULL && cfg->count == 1) { + qxl->guest_primary.resized = 1; + qxl->guest_head0_width = cfg->heads[0].width; +@@ -459,7 +460,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext) + switch (le32_to_cpu(ext->cmd.type)) { + case QXL_CMD_SURFACE: + { +- QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLSurfaceCmd)); + + if (!cmd) { + return 1; +@@ -494,7 +496,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext) + } + case QXL_CMD_CURSOR: + { +- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLCursorCmd)); + + if (!cmd) { + return 1; +@@ -1444,7 +1447,8 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + } + + /* can be also called from spice server thread context */ +-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id) ++void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id, ++ size_t size) + { + uint64_t offset; + uint32_t slot; +@@ -1952,7 +1956,7 @@ static void qxl_dirty_surfaces(PCIQXLDevice *qxl) + } + + cmd = qxl_phys2virt(qxl, qxl->guest_surfaces.cmds[i], +- MEMSLOT_GROUP_GUEST); ++ MEMSLOT_GROUP_GUEST, sizeof(QXLSurfaceCmd)); + assert(cmd); + assert(cmd->type == QXL_SURFACE_CMD_CREATE); + qxl_dirty_one_surface(qxl, cmd->u.surface_create.data, +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index c938f88a2f..c784315daa 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -153,6 +153,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * @qxl: QXL device + * @phys: physical offset of buffer within the VRAM + * @group_id: memory slot group ++ * @size: size of the buffer + * + * Returns a host pointer to a buffer placed at offset @phys within the + * active slot @group_id of the PCI VGA RAM memory region associated with +@@ -166,7 +167,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * the incoming ram_addr_t. + * + */ +-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id); ++void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id, ++ size_t size); + void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) + GCC_FMT_ATTR(2, 3); + +-- +2.37.3 + diff --git a/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch b/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch new file mode 100644 index 0000000..eea6fa2 --- /dev/null +++ b/SOURCES/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch @@ -0,0 +1,75 @@ +From 2db3d0de1be018f14cb91fdd4a368996b09d8bec Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 1/3] hw/intc/arm_gicv3: Check for !MEMTX_OK instead of + MEMTX_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [1/3] 561c9c2b1249f07d33013040b1c495ed1fbf825b (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit b9d383ab797f54ae5fa8746117770709921dc529 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:19 2021 +0100 + + hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR + + Quoting Peter Maydell: + + "These MEMTX_* aren't from the memory transaction + API functions; they're just being used by gicd_readl() and + friends as a way to indicate a success/failure so that the + actual MemoryRegionOps read/write fns like gicv3_dist_read() + can log a guest error." + + We are going to introduce more MemTxResult bits, so it is + safer to check for !MEMTX_OK rather than MEMTX_ERROR. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Peter Maydell + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Peter Maydell + +(cherry picked from commit b9d383ab797f54ae5fa8746117770709921dc529) +Signed-off-by: Jon Maloy +--- + hw/intc/arm_gicv3_redist.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c +index c8ff3eca08..99b11ca5ee 100644 +--- a/hw/intc/arm_gicv3_redist.c ++++ b/hw/intc/arm_gicv3_redist.c +@@ -462,7 +462,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest read at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +@@ -521,7 +521,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +-- +2.27.0 + diff --git a/SOURCES/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch b/SOURCES/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch new file mode 100644 index 0000000..bb42634 --- /dev/null +++ b/SOURCES/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch @@ -0,0 +1,66 @@ +From f0115d856f46e65e3b62896f84fe1902a958bf79 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 04/18] hw/virtio: vdpa: Fix leak of host-notifier + memory-region + +RH-Author: Jon Maloy +RH-MergeRequest: 132: hw/virtio: vdpa: Fix leak of host-notifier memory-region +RH-Commit: [1/1] b3cec35d185e3b9844a458f5c51c5d5ef7e3d8f1 (jmaloy/qemu-kvm) +RH-Bugzilla: 2060843 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Laurent Vivier +RH-Acked-by: Igor Mammedov + +BZ: https://bugzilla.redhat.com/2060843 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83 +Author: Laurent Vivier +Date: Fri Feb 11 18:02:59 2022 +0100 + + hw/virtio: vdpa: Fix leak of host-notifier memory-region + + If call virtio_queue_set_host_notifier_mr fails, should free + host-notifier memory-region. + + This problem can trigger a coredump with some vDPA drivers (mlx5, + but not with the vdpasim), if we unplug the virtio-net card from + the guest after a stop/start. + + The same fix has been done for vhost-user: + 1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region") + + Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible") + Cc: jasowang@redhat.com + Resolves: https://bugzilla.redhat.com/2027208 + Signed-off-by: Laurent Vivier + Message-Id: <20220211170259.1388734-1-lvivier@redhat.com> + Cc: qemu-stable@nongnu.org + Acked-by: Jason Wang + Reviewed-by: Stefano Garzarella + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vdpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bcaf00e09f..78da48a333 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) + g_free(name); + + if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { ++ object_unparent(OBJECT(&n->mr)); + munmap(addr, page_size); + goto err; + } +-- +2.27.0 + diff --git a/SOURCES/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch b/SOURCES/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch new file mode 100644 index 0000000..540f721 --- /dev/null +++ b/SOURCES/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch @@ -0,0 +1,59 @@ +From ccaa1135bd1aa90c94f0e8b5417bd2a420134e6c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 08/18] i386: Add Icelake-Server-v6 CPU model with 5-level EPT + support + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [2/2] e913746b2df9cbd0308014ab5cc72577458857fa (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit: 12cab535db6440af41ed8dfefe908a594321b6ce +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:15 2022 +0100 + + i386: Add Icelake-Server-v6 CPU model with 5-level EPT support + + Windows 11 with WSL2 enabled (Hyper-V) fails to boot with Icelake-Server + {-v5} CPU model but boots well with '-cpu host'. Apparently, it expects + 5-level paging and 5-level EPT support to come in pair but QEMU's + Icelake-Server CPU model lacks the later. Introduce 'Icelake-Server-v6' + CPU model with 'vmx-page-walk-5' enabled by default. + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-1-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit 12cab535db6440af41ed8dfefe908a594321b6ce) +Signed-off-by: Jon Maloy +--- + target/i386/cpu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index aa9e636800..6e25d13339 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3505,6 +3505,14 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 6, ++ .note = "5-level EPT", ++ .props = (PropValue[]) { ++ { "vmx-page-walk-5", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch new file mode 100644 index 0000000..514dd55 --- /dev/null +++ b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch @@ -0,0 +1,67 @@ +From f96220d64a31a4a52b2d132a503048579946f982 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:13 +0200 +Subject: [PATCH 3/3] i386: do kvm_put_msr_feature_control() first thing when + vCPU is reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 08e1e67db96801e4a35aa6b60a93b2c2f1641220 + +kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when +it is in VMX root operation. Do kvm_put_msr_feature_control() before +kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also +seems logical to do kvm_put_msr_feature_control() before +kvm_put_nested_state() and not after it, especially when 'real' nested +state is set. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-3-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 81d729dc40..a06221d3e5 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -4255,6 +4255,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + ++ /* ++ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX ++ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also ++ * preceed kvm_put_nested_state() when 'real' nested state is set. ++ */ ++ if (level >= KVM_PUT_RESET_STATE) { ++ ret = kvm_put_msr_feature_control(x86_cpu); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ + /* must be before kvm_put_nested_state so that EFER.SVME is set */ + ret = kvm_put_sregs(x86_cpu); + if (ret < 0) { +@@ -4266,11 +4278,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + if (ret < 0) { + return ret; + } +- +- ret = kvm_put_msr_feature_control(x86_cpu); +- if (ret < 0) { +- return ret; +- } + } + + if (level == KVM_PUT_FULL_STATE) { +-- +2.35.3 + diff --git a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch new file mode 100644 index 0000000..411bed4 --- /dev/null +++ b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch @@ -0,0 +1,94 @@ +From 46e54544c3480658111d6f111d6c265dcea2e19b Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:12 +0200 +Subject: [PATCH 2/3] i386: reset KVM nested state upon CPU reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] de4db7bceb6baaf69aec8b0ae9aa8887aa869e15 + +Make sure env->nested_state is cleaned up when a vCPU is reset, it may +be stale after an incoming migration, kvm_arch_put_registers() may +end up failing or putting vCPU in a weird state. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-2-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++---------- + 1 file changed, 27 insertions(+), 10 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index bd439e56ad..81d729dc40 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1615,6 +1615,30 @@ static void kvm_init_xsave(CPUX86State *env) + env->xsave_buf_len); + } + ++static void kvm_init_nested_state(CPUX86State *env) ++{ ++ struct kvm_vmx_nested_state_hdr *vmx_hdr; ++ uint32_t size; ++ ++ if (!env->nested_state) { ++ return; ++ } ++ ++ size = env->nested_state->size; ++ ++ memset(env->nested_state, 0, size); ++ env->nested_state->size = size; ++ ++ if (cpu_has_vmx(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; ++ vmx_hdr = &env->nested_state->hdr.vmx; ++ vmx_hdr->vmxon_pa = -1ull; ++ vmx_hdr->vmcs12_pa = -1ull; ++ } else if (cpu_has_svm(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; ++ } ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -2042,19 +2066,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); + + if (cpu_has_vmx(env) || cpu_has_svm(env)) { +- struct kvm_vmx_nested_state_hdr *vmx_hdr; +- + env->nested_state = g_malloc0(max_nested_state_len); + env->nested_state->size = max_nested_state_len; + +- if (cpu_has_vmx(env)) { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; +- vmx_hdr = &env->nested_state->hdr.vmx; +- vmx_hdr->vmxon_pa = -1ull; +- vmx_hdr->vmcs12_pa = -1ull; +- } else { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; +- } ++ kvm_init_nested_state(env); + } + } + +@@ -2117,6 +2132,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) + /* enabled by default */ + env->poll_control_msr = 1; + ++ kvm_init_nested_state(env); ++ + sev_es_set_reset_vector(CPU(cpu)); + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch b/SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch new file mode 100644 index 0000000..6af2a9f --- /dev/null +++ b/SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch @@ -0,0 +1,92 @@ +From eaade87072e903cf550dfdb8ed1480dddc6bb0e3 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 20 Jan 2022 15:22:59 +0100 +Subject: [PATCH 21/24] ide: Increment BB in-flight counter for TRIM BH +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Reitz +RH-MergeRequest: 188: ide: Increment BB in-flight counter for TRIM BH +RH-Commit: [1/1] 1e702e735ff63f2b8b69c20cac1b309dd085cd62 +RH-Bugzilla: 2029980 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Paolo Bonzini + +When we still have an AIOCB registered for DMA operations, we try to +settle the respective operation by draining the BlockBackend associated +with the IDE device. + +However, this assumes that every DMA operation is associated with an +increment of the BlockBackend’s in-flight counter (e.g. through some +ongoing I/O operation), so that draining the BB until its in-flight +counter reaches 0 will settle all DMA operations. That is not the case: +For TRIM, the guest can issue a zero-length operation that will not +result in any I/O operation forwarded to the BlockBackend, and also not +increment the in-flight counter in any other way. In such a case, +blk_drain() will be a no-op if no other operations are in flight. + +It is clear that if blk_drain() is a no-op, the value of +s->bus->dma->aiocb will not change between checking it in the `if` +condition and asserting that it is NULL after blk_drain(). + +The particular problem is that ide_issue_trim() creates a BH +(ide_trim_bh_cb()) to settle the TRIM request: iocb->common.cb() is +ide_dma_cb(), which will either create a new request, or find the +transfer to be done and call ide_set_inactive(), which clears +s->bus->dma->aiocb. Therefore, the blk_drain() must wait for +ide_trim_bh_cb() to run, which currently it will not always do. + +To fix this issue, we increment the BlockBackend's in-flight counter +when the TRIM operation begins (in ide_issue_trim(), when the +ide_trim_bh_cb() BH is created) and decrement it when ide_trim_bh_cb() +is done. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2029980 +Suggested-by: Paolo Bonzini +Signed-off-by: Hanna Reitz +Message-Id: <20220120142259.120189-1-hreitz@redhat.com> +Reviewed-by: Paolo Bonzini +Reviewed-by: John Snow +Tested-by: John Snow +(cherry picked from commit 7e5cdb345f77d76cb4877fe6230c4e17a7d0d0ca) +Signed-off-by: Hanna Reitz +--- + hw/ide/core.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ide/core.c b/hw/ide/core.c +index e28f8aad61..15138225be 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -433,12 +433,16 @@ static const AIOCBInfo trim_aiocb_info = { + static void ide_trim_bh_cb(void *opaque) + { + TrimAIOCB *iocb = opaque; ++ BlockBackend *blk = iocb->s->blk; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); ++ ++ /* Paired with an increment in ide_issue_trim() */ ++ blk_dec_in_flight(blk); + } + + static void ide_issue_trim_cb(void *opaque, int ret) +@@ -508,6 +512,9 @@ BlockAIOCB *ide_issue_trim( + IDEState *s = opaque; + TrimAIOCB *iocb; + ++ /* Paired with a decrement in ide_trim_bh_cb() */ ++ blk_inc_in_flight(s->blk); ++ + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; + iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); +-- +2.35.3 + diff --git a/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch b/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch new file mode 100644 index 0000000..9e17d2c --- /dev/null +++ b/SOURCES/kvm-include-elf.h-add-s390x-note-types.patch @@ -0,0 +1,43 @@ +From 3fceb3b60a60c5008eecf99e45e269b757042b5a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:20 +0000 +Subject: [PATCH 39/42] include/elf.h: add s390x note types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [39/41] ebf0873744905abbe9cfc423a56c6d1b4f2ae936 + +Adding two s390x note types + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Message-Id: <20221017083822.43118-9-frankja@linux.ibm.com> +(cherry picked from commit 5433669c7a1884cc0394c360148965edf7519884) +Signed-off-by: Cédric Le Goater +--- + include/elf.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/elf.h b/include/elf.h +index 811bf4a1cb..4edab8e5a2 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1647,6 +1647,8 @@ typedef struct elf64_shdr { + #define NT_TASKSTRUCT 4 + #define NT_AUXV 6 + #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ ++#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ ++#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ + #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ + #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ + #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */ +-- +2.37.3 + diff --git a/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch new file mode 100644 index 0000000..6f2cc3f --- /dev/null +++ b/SOURCES/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch @@ -0,0 +1,367 @@ +From 88b5e059462a72ca758d84c0d4d0895a03baac50 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:17 +0000 +Subject: [PATCH 1/3] io: Add support for MSG_PEEK for socket channel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder +RH-Bugzilla: 2137740 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/2] 04fc6fae358599b8509f5355469d2e8720f01903 + +Conflicts: + io/channel-null.c + migration/channel-block.c + + Because these two files do not exist in rhel8.8 tree, dropping the + changes. + +MSG_PEEK peeks at the channel, The data is treated as unread and +the next read shall still return this data. This support is +currently added only for socket class. Extra parameter 'flags' +is added to io_readv calls to pass extra read flags like MSG_PEEK. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) +Signed-off-by: Peter Xu +--- + chardev/char-socket.c | 4 ++-- + include/io/channel.h | 6 ++++++ + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 19 ++++++++++++++++++- + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 16 ++++++++++++---- + migration/rdma.c | 1 + + scsi/qemu-pr-helper.c | 2 +- + tests/qtest/tpm-emu.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + util/vhost-user-server.c | 2 +- + 14 files changed, 48 insertions(+), 10 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 836cfa0bc2..4cdf79e0c2 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -339,11 +339,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, +- NULL); ++ 0, NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, +- NULL); ++ 0, NULL); + } + + if (ret == QIO_CHANNEL_ERR_BLOCK) { +diff --git a/include/io/channel.h b/include/io/channel.h +index c680ee7480..716235d496 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + ++#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { +@@ -41,6 +43,7 @@ enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + }; + + +@@ -114,6 +117,7 @@ struct QIOChannelClass { + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); +@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return ++ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the +@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + + +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index bf52011be2..8096180f85 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 5ff1691bad..2834413b3a 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -230,6 +230,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index 348a48545e..490f0e5d84 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 6010ad7017..ca8b180b69 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -174,6 +174,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + } + #endif + ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + return 0; + } + +@@ -407,6 +410,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + } + #endif /* WIN32 */ + ++ qio_channel_set_feature(QIO_CHANNEL(cioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + +@@ -497,6 +503,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -518,6 +525,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + + } + ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } ++ + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { +@@ -625,11 +636,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; ++ int sflags = 0; ++ ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } + + for (i = 0; i < niov; i++) { + ssize_t ret; +@@ -637,7 +654,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, +- 0); ++ sflags); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 4ce890a538..c730cb8ec5 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 035dd6075b..13c94f2afe 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index 0640941ac5..a8c7f11649 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); +@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + return -1; + } + +- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); ++ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support peek read"); ++ return -1; ++ } ++ ++ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, + while ((nlocal_iov > 0) || local_fds) { + ssize_t len; + len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, +- local_nfds, errp); ++ local_nfds, 0, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); +@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); + } + + +@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; +- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); + } + + +diff --git a/migration/rdma.c b/migration/rdma.c +index 54acd2000e..dcf98bd7f8 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2917,6 +2917,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index f281daeced..12ec8e9368 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -612,7 +612,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, + iov.iov_base = buf; + iov.iov_len = sz; + n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, +- &fds, &nfds, errp); ++ &fds, &nfds, 0, errp); + + if (n_read == QIO_CHANNEL_ERR_BLOCK) { + qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); +diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c +index 2994d1cf42..3cf1acaf7d 100644 +--- a/tests/qtest/tpm-emu.c ++++ b/tests/qtest/tpm-emu.c +@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) + int *pfd = NULL; + size_t nfd = 0; + +- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); ++ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); + cmd = be32_to_cpu(cmd); + g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); + g_assert_cmpint(nfd, ==, 1); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index 6713886d02..de2930f203 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -452,6 +452,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iorecv), + &fdrecv, + &nfdrecv, ++ 0, + &error_abort); + + g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index 783d847a6d..e6a9ef72b7 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -102,7 +102,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) + * qio_channel_readv_full may have short reads, keeping calling it + * until getting VHOST_USER_HDR_SIZE or 0 bytes in total + */ +- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); ++ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); + if (rc < 0) { + if (rc == QIO_CHANNEL_ERR_BLOCK) { + assert(local_err == NULL); +-- +2.37.3 + diff --git a/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch new file mode 100644 index 0000000..0d652dd --- /dev/null +++ b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -0,0 +1,52 @@ +From 676e19198916d7631ba1367646dd08dc72079f88 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 21 Apr 2022 16:24:35 +0200 +Subject: [PATCH 6/6] iotests/108: Fix when missing user_allow_other + +RH-Author: Hanna Reitz +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] 36b70b5378ae7c8084b9e847706f00003abe9c11 +RH-Bugzilla: 1519071 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake + +FUSE exports' allow-other option defaults to "auto", which means that it +will try passing allow_other as a mount option, and fall back to not +using it when an error occurs. We make no effort to hide fusermount's +error message (because it would be difficult, and because users might +want to know about the fallback occurring), and so when allow_other does +not work (primarily when /etc/fuse.conf does not contain +user_allow_other), this error message will appear and break the +reference output. + +We do not need allow_other here, though, so we can just pass +allow-other=off to fix that. + +Reported-by: Markus Armbruster +Signed-off-by: Hanna Reitz +Message-Id: <20220421142435.569600-1-hreitz@redhat.com> +Tested-by: Markus Armbruster +Tested-by: Eric Blake +(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 23abbeaff0..775ff08eca 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -326,7 +326,7 @@ else + + $QSD \ + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ +- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ + --pidfile "$TEST_DIR/qsd.pid" \ + & + +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch new file mode 100644 index 0000000..cc67d7c --- /dev/null +++ b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -0,0 +1,445 @@ +From d638552d76db0db9e2b6ae90a35f0b451b0cbaf8 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:51 +0200 +Subject: [PATCH 4/6] iotests/108: Test new refcount rebuild algorithm + +RH-Author: Hanna Reitz +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] 2aa8c383f0c88c414f10ade8bd2e8af07c35f35b +RH-Bugzilla: 1519071 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake + +One clear problem with how qcow2's refcount structure rebuild algorithm +used to be before "qcow2: Improve refcount structure rebuilding" was +that it is prone to failure for qcow2 images on block devices: There is +generally unused space after the actual image, and if that exceeds what +one refblock covers, the old algorithm would invariably write the +reftable past the block device's end, which cannot work. The new +algorithm does not have this problem. + +Test it with three tests: +(1) Create an image with more empty space at the end than what one + refblock covers, see whether rebuilding the refcount structures + results in a change in the image file length. (It should not.) + +(2) Leave precisely enough space somewhere at the beginning of the image + for the new reftable (and the refblock for that place), see whether + the new algorithm puts the reftable there. (It should.) + +(3) Test the original problem: Create (something like) a block device + with a fixed size, then create a qcow2 image in there, write some + data, and then have qemu-img check rebuild the refcount structures. + Before HEAD^, the reftable would have been written past the image + file end, i.e. outside of what the block device provides, which + cannot work. HEAD^ should have fixed that. + ("Something like a block device" means a loop device if we can use + one ("sudo -n losetup" works), or a FUSE block export with + growable=false otherwise.) + +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-3-hreitz@redhat.com> +(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) + +Conflicts: +- 108: The downstream qemu-storage-daemon does not support --daemonize, + so this switch has been replaced by a loop waiting for the PID file to + appear + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- + tests/qemu-iotests/108.out | 81 ++++++++++++ + 2 files changed, 343 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 8eaef0b8bf..23abbeaff0 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -30,13 +30,20 @@ status=1 # failure is the default! + + _cleanup() + { +- _cleanup_test_img ++ _cleanup_test_img ++ if [ -f "$TEST_DIR/qsd.pid" ]; then ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -KILL "$qsd_pid" ++ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null ++ fi ++ rm -f "$TEST_DIR/fuse-export" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks + . ./common.rc + . ./common.filter ++. ./common.qemu + + # This tests qcow2-specific low-level functionality + _supported_fmt qcow2 +@@ -47,6 +54,22 @@ _supported_os Linux + # files + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + ++# This test either needs sudo -n losetup or FUSE exports to work ++if sudo -n losetup &>/dev/null; then ++ loopdev=true ++else ++ loopdev=false ++ ++ # QSD --export fuse will either yield "Parameter 'id' is missing" ++ # or "Invalid parameter 'fuse'", depending on whether there is ++ # FUSE support or not. ++ error=$($QSD --export fuse 2>&1) ++ if [[ $error = *"'fuse'"* ]]; then ++ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ ++ 'neither is available' ++ fi ++fi ++ + echo + echo '=== Repairing an image without any refcount table ===' + echo +@@ -138,6 +161,244 @@ _make_test_img 64M + poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" + _check_test_img -r all + ++echo ++echo '=== Check rebuilt reftable location ===' ++ ++# In an earlier version of the refcount rebuild algorithm, the ++# reftable was generally placed at the image end (unless something was ++# allocated in the area covered by the refblock right before the image ++# file end, then we would try to place the reftable in that refblock). ++# This was later changed so the reftable would be placed in the ++# earliest possible location. Test this. ++ ++echo ++echo '--- Does the image size increase? ---' ++echo ++ ++# First test: Just create some image, write some data to it, and ++# resize it so there is free space at the end of the image (enough ++# that it spans at least one full refblock, which for cluster_size=512 ++# images, spans 128k). With the old algorithm, the reftable would ++# have then been placed at the end of the image file, but with the new ++# one, it will be put in that free space. ++# We want to check whether the size of the image file increases due to ++# rebuilding the refcount structures (it should not). ++ ++_make_test_img -o 'cluster_size=512' 1M ++# Write something ++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ++ ++# Add free space ++file_len=$(stat -c '%s' "$TEST_IMG") ++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++# Check whether rebuilding the refcount structures increases the image ++# file size ++file_len=$(stat -c '%s' "$TEST_IMG") ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++post_repair_file_len=$(stat -c '%s' "$TEST_IMG") ++ ++if [[ $file_len -eq $post_repair_file_len ]]; then ++ echo 'OK: Image size did not change' ++else ++ echo 'ERROR: Image size differs' \ ++ "($file_len before, $post_repair_file_len after)" ++fi ++ ++echo ++echo '--- Will the reftable occupy a hole specifically left for it? ---' ++echo ++ ++# Note: With cluster_size=512, every refblock covers 128k. ++# The reftable covers 8M per reftable cluster. ++ ++# Create an image that requires two reftable clusters (just because ++# this is more interesting than a single-clustered reftable). ++_make_test_img -o 'cluster_size=512' 9M ++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io ++ ++# Writing 8M will have resized the reftable. Unfortunately, doing so ++# will leave holes in the file, so we need to fill them up so we can ++# be sure the whole file is allocated. Do that by writing ++# consecutively smaller chunks starting from 8 MB, until the file ++# length increases even with a chunk size of 512. Then we must have ++# filled all holes. ++ofs=$((8 * 1024 * 1024)) ++block_len=$((16 * 1024)) ++while [[ $block_len -ge 512 ]]; do ++ file_len=$(stat -c '%s' "$TEST_IMG") ++ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do ++ # Do not include this in the reference output, it does not ++ # really matter which qemu-io calls we do here exactly ++ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null ++ ofs=$((ofs + block_len)) ++ done ++ block_len=$((block_len / 2)) ++done ++ ++# Fill up to 9M (do not include this in the reference output either, ++# $ofs is random for all we know) ++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null ++ ++# Make space as follows: ++# - For the first refblock: Right at the beginning of the image (this ++# refblock is placed in the first place possible), ++# - For the reftable somewhere soon afterwards, still near the ++# beginning of the image (i.e. covered by the first refblock); the ++# reftable too is placed in the first place possible, but only after ++# all refblocks have been placed) ++# No space is needed for the other refblocks, because no refblock is ++# put before the space it covers. In this test case, we do not mind ++# if they are placed at the image file's end. ++ ++# Before we make that space, we have to find out the host offset of ++# the area that belonged to the two data clusters at guest offset 4k, ++# because we expect the reftable to be placed there, and we will have ++# to verify that it is. ++ ++l1_offset=$(peek_file_be "$TEST_IMG" 40 8) ++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) ++l2_offset=$((l2_offset & 0x00fffffffffffe00)) ++data_4k_offset=$(peek_file_be "$TEST_IMG" \ ++ $((l2_offset + 4096 / 512 * 8)) 8) ++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) ++ ++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++ ++# Check whether the reftable was put where we expected ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++if [[ $rt_offset -eq $data_4k_offset ]]; then ++ echo 'OK: Reftable is where we expect it' ++else ++ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" ++fi ++ ++echo ++echo '--- Rebuilding refcount structures on block devices ---' ++echo ++ ++# A block device cannot really grow, at least not during qemu-img ++# check. As mentioned in the above cases, rebuilding the refcount ++# structure may lead to new refcount structures being written after ++# the end of the image, and in the past that happened even if there ++# was more than sufficient space in the image. Such post-EOF writes ++# will not work on block devices, so test that the new algorithm ++# avoids it. ++ ++# If we have passwordless sudo and losetup, we can use those to create ++# a block device. Otherwise, we can resort to qemu's FUSE export to ++# create a file that isn't growable, which effectively tests the same ++# thing. ++ ++_cleanup_test_img ++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" ++ ++if $loopdev; then ++ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") ++ export_mp_driver=host_device ++ sudo -n chmod go+rw "$export_mp" ++else ++ # Create non-growable FUSE export that is a bit like an empty ++ # block device ++ export_mp="$TEST_DIR/fuse-export" ++ export_mp_driver=file ++ touch "$export_mp" ++ ++ $QSD \ ++ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --pidfile "$TEST_DIR/qsd.pid" \ ++ & ++ ++ while [ ! -f "$TEST_DIR/qsd.pid" ]; do ++ sleep 0.1 ++ done ++fi ++ ++# Now create a qcow2 image on the device -- unfortunately, qemu-img ++# create force-creates the file, so we have to resort to the ++# blockdev-create job. ++_launch_qemu \ ++ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "qmp_capabilities" }' \ ++ 'return' ++ ++# Small cluster size again, so the image needs multiple refblocks ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "qcow2", ++ "file": "file", ++ "size": '$((64 * 1024 * 1024))', ++ "cluster-size": 512 ++ } } }' \ ++ '"concluded"' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ ++ 'return' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "quit" }' \ ++ 'return' ++ ++wait=y _cleanup_qemu ++echo ++ ++# Write some data ++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$export_mp" 48 8) ++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) ++poke_file "$export_mp" $rb_offset "\x00\x00" ++ ++# Repairing such a simple case should just work ++# (We used to put the reftable at the end of the image file, which can ++# never work for non-growable devices.) ++echo ++TEST_IMG="$export_mp" _check_test_img -r all \ ++ | grep -v '^Repairing cluster.*refcount=1 reference=0' ++ ++if $loopdev; then ++ sudo -n losetup -d "$export_mp" ++else ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -TERM "$qsd_pid" ++ # Wait for process to exit (cannot `wait` because the QSD is daemonized) ++ while [ -f "$TEST_DIR/qsd.pid" ]; do ++ true ++ done ++fi ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index 75bab8dc84..b5401d788d 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: + 0 leaked clusters + 1 corruptions + ++Double checking the fixed image now... ++No errors were found on the image. ++ ++=== Check rebuilt reftable location === ++ ++--- Does the image size increase? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Image size did not change ++ ++--- Will the reftable occupy a hole specifically left for it? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 ++wrote 8388608/8388608 bytes at offset 0 ++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 1024/1024 bytes at offset 4096 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Reftable is where we expect it ++ ++--- Rebuilding refcount structures on block devices --- ++ ++{ "execute": "qmp_capabilities" } ++{"return": {}} ++{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "IMGFMT", ++ "file": "file", ++ "size": 67108864, ++ "cluster-size": 512 ++ } } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} ++{ "execute": "job-dismiss", "arguments": { "id": "create" } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} ++{"return": {}} ++{ "execute": "quit" } ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++ ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ + Double checking the fixed image now... + No errors were found on the image. + *** done +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch b/SOURCES/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch new file mode 100644 index 0000000..b703c23 --- /dev/null +++ b/SOURCES/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch @@ -0,0 +1,108 @@ +From 2ed48247fd39ade97164dee3c65162b96a116f14 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:12 +0100 +Subject: [PATCH 6/6] iotests/281: Let NBD connection yield in iothread + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [6/6] a23706f34022d301eb7ffc84fc0d0a77d72b9844 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Put an NBD block device into an I/O thread, and then read data from it, +hoping that the NBD connection will yield during that read. When it +does, the coroutine must be reentered in the block device's I/O thread, +which will only happen if the NBD block driver attaches the connection's +QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde +("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc +on AioContext change", which would cause an assertion failure. + +To improve our chances of yielding, the NBD server is throttled to +reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at +some point. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- + tests/qemu-iotests/281.out | 4 ++-- + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 13c588be75..b2ead7f388 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + self.create_nbd_export() + + # Simple VM with an NBD block device connected to the NBD export +- # provided by the QSD ++ # provided by the QSD, and an (initially unused) iothread + self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothr') + self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + + f'server.path={self.sock},export=exp,' + + 'reconnect-delay=1') +@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + # thus not see the error, and so the test will pass.) + time.sleep(2) + ++ def test_yield_in_iothread(self): ++ # Move the NBD node to the I/O thread; the NBD block driver should ++ # attach the connection's QIOChannel to that thread's AioContext, too ++ result = self.vm.qmp('x-blockdev-set-iothread', ++ node_name='nbd', iothread='iothr') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Do some I/O that will be throttled by the QSD, so that the network ++ # connection hopefully will yield here. When it is resumed, it must ++ # then be resumed in the I/O thread's AioContext. ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "read 0 128K"') ++ self.assert_qmp(result, 'return', '') ++ + def create_nbd_export(self): + assert self.qsd is None + +- # Simple NBD export of a null-co BDS ++ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), ++ # writes are not. + self.qsd = QemuStorageDaemon( ++ '--object', ++ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', ++ + '--blockdev', + 'null-co,node-name=null,read-zeroes=true', + ++ '--blockdev', ++ 'throttle,node-name=thr,file=null,throttle-group=thrgr', ++ + '--nbd-server', + f'addr.type=unix,addr.path={self.sock}', + + '--export', +- 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ 'nbd,id=exp,node-name=thr,name=exp,writable=true' + ) + + def stop_nbd_export(self): +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 914e3737bd..3f8a935a08 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-..... ++...... + ---------------------------------------------------------------------- +-Ran 5 tests ++Ran 6 tests + + OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-281-Test-lingering-timers.patch b/SOURCES/kvm-iotests-281-Test-lingering-timers.patch new file mode 100644 index 0000000..c31b413 --- /dev/null +++ b/SOURCES/kvm-iotests-281-Test-lingering-timers.patch @@ -0,0 +1,174 @@ +From b56684f6c1bef4fb5bf87ac5a1106d3830c05ad0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:10 +0100 +Subject: [PATCH 4/6] iotests/281: Test lingering timers + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [4/6] aaad466941637a34224dc037bbea37d128b5676b +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Prior to "block/nbd: Delete reconnect delay timer when done" and +"block/nbd: Delete open timer when done", both of those timers would +remain scheduled even after successfully (re-)connecting to the server, +and they would not even be deleted when the BDS is deleted. + +This test constructs exactly this situation: +(1) Configure an @open-timeout, so the open timer is armed, and +(2) Configure a @reconnect-delay and trigger a reconnect situation + (which succeeds immediately), so the reconnect delay timer is armed. +Then we immediately delete the BDS, and sleep for longer than the +@open-timeout and @reconnect-delay. Prior to said patches, this caused +one (or both) of the timer CBs to access already-freed data. + +Accessing freed data may or may not crash, so this test can produce +false successes, but I do not know how to show the problem in a better +or more reliable way. If you run this test on "block/nbd: Assert there +are no timers when closed" and without the fix patches mentioned above, +you should reliably see an assertion failure. +(But all other tests that use the reconnect delay timer (264 and 277) +will fail in that configuration, too; as will nbd-reconnect-on-open, +which uses the open timer.) + +Remove this test from the quick group because of the two second sleep +this patch introduces. + +(I decided to put this test case into 281, because the main bug this +series addresses is in the interaction of the NBD block driver and I/O +threads, which is precisely the scope of 281. The test case for that +other bug will also be put into the test class added here. + +Also, excuse the test class's name, I couldn't come up with anything +better. The "yield" part will make sense two patches from now.) + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) + +Conflict: +- @open-timeout was introduced after the 6.2 release, and has not been + backported. Consequently, there is no open_timer, and we can (and + must) drop the respective parts of the test here. + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/281.out | 4 +-- + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 956698083f..13c588be75 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -1,5 +1,5 @@ + #!/usr/bin/env python3 +-# group: rw quick ++# group: rw + # + # Test cases for blockdev + IOThread interactions + # +@@ -20,8 +20,9 @@ + # + + import os ++import time + import iotests +-from iotests import qemu_img ++from iotests import qemu_img, QemuStorageDaemon + + image_len = 64 * 1024 * 1024 + +@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): + # Hangs on failure, we expect this error. + self.assert_qmp(result, 'error/class', 'GenericError') + ++# Test for RHBZ#2033626 ++class TestYieldingAndTimers(iotests.QMPTestCase): ++ sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ qsd = None ++ ++ def setUp(self): ++ self.create_nbd_export() ++ ++ # Simple VM with an NBD block device connected to the NBD export ++ # provided by the QSD ++ self.vm = iotests.VM() ++ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + ++ f'server.path={self.sock},export=exp,' + ++ 'reconnect-delay=1') ++ ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.stop_nbd_export() ++ self.vm.shutdown() ++ ++ def test_timers_with_blockdev_del(self): ++ # Stop and restart the NBD server, and do some I/O on the client to ++ # trigger a reconnect and start the reconnect delay timer ++ self.stop_nbd_export() ++ self.create_nbd_export() ++ ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "write 0 512"') ++ self.assert_qmp(result, 'return', '') ++ ++ # Reconnect is done, so the reconnect delay timer should be gone. ++ # (But there used to be a bug where it remained active, for which this ++ # is a regression test.) ++ ++ # Delete the BDS to see whether the timer is gone. If it is not, ++ # it will remain active, fire later, and then access freed data. ++ # (Or, with "block/nbd: Assert there are no timers when closed" ++ # applied, the assertion added in that patch will fail.) ++ result = self.vm.qmp('blockdev-del', node_name='nbd') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Give the timer some time to fire (it has a timeout of 1 s). ++ # (Sleeping in an iotest may ring some alarm bells, but note that if ++ # the timing is off here, the test will just always pass. If we kill ++ # the VM too early, then we just kill the timer before it can fire, ++ # thus not see the error, and so the test will pass.) ++ time.sleep(2) ++ ++ def create_nbd_export(self): ++ assert self.qsd is None ++ ++ # Simple NBD export of a null-co BDS ++ self.qsd = QemuStorageDaemon( ++ '--blockdev', ++ 'null-co,node-name=null,read-zeroes=true', ++ ++ '--nbd-server', ++ f'addr.type=unix,addr.path={self.sock}', ++ ++ '--export', ++ 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ ) ++ ++ def stop_nbd_export(self): ++ self.qsd.stop() ++ self.qsd = None ++ + if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 89968f35d7..914e3737bd 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-.... ++..... + ---------------------------------------------------------------------- +-Ran 4 tests ++Ran 5 tests + + OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch b/SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch new file mode 100644 index 0000000..5d45438 --- /dev/null +++ b/SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch @@ -0,0 +1,99 @@ +From 12f596b66d577eb92f154fadf734d058dd0756d6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:54 +0100 +Subject: [PATCH 23/24] iotests: Allow using QMP with the QSD + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [2/3] 55bee4690a2e02d3be9f2bd68f2d244d0a36743b +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a parameter to optionally open a QMP connection when creating a +QemuStorageDaemon instance. + +Signed-off-by: Hanna Reitz +Message-Id: <20220216105355.30729-3-hreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit ec88eed8d14088b36a3495710368b8d1a3c33420) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index a51b5ce8cd..2ef493755c 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -38,6 +38,7 @@ + + from qemu.machine import qtest + from qemu.qmp import QMPMessage ++from qemu.aqmp.legacy import QEMUMonitorProtocol + + # Use this logger for logging messages directly from the iotests module + logger = logging.getLogger('qemu.iotests') +@@ -315,14 +316,30 @@ def cmd(self, cmd): + + + class QemuStorageDaemon: +- def __init__(self, *args: str, instance_id: str = 'a'): ++ _qmp: Optional[QEMUMonitorProtocol] = None ++ _qmpsock: Optional[str] = None ++ # Python < 3.8 would complain if this type were not a string literal ++ # (importing `annotations` from `__future__` would work; but not on <= 3.6) ++ _p: 'Optional[subprocess.Popen[bytes]]' = None ++ ++ def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False): + assert '--pidfile' not in args + self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') + all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] + ++ if qmp: ++ self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock') ++ all_args += ['--chardev', ++ f'socket,id=qmp-sock,path={self._qmpsock}', ++ '--monitor', 'qmp-sock'] ++ ++ self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True) ++ + # Cannot use with here, we want the subprocess to stay around + # pylint: disable=consider-using-with + self._p = subprocess.Popen(all_args) ++ if self._qmp is not None: ++ self._qmp.accept() + while not os.path.exists(self.pidfile): + if self._p.poll() is not None: + cmd = ' '.join(all_args) +@@ -337,11 +354,24 @@ def __init__(self, *args: str, instance_id: str = 'a'): + + assert self._pid == self._p.pid + ++ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ ++ -> QMPMessage: ++ assert self._qmp is not None ++ return self._qmp.cmd(cmd, args) ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() + self._p = None + ++ if self._qmp: ++ self._qmp.close() ++ ++ if self._qmpsock is not None: ++ try: ++ os.remove(self._qmpsock) ++ except OSError: ++ pass + try: + os.remove(self.pidfile) + except OSError: +-- +2.35.3 + diff --git a/SOURCES/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch b/SOURCES/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch new file mode 100644 index 0000000..1caf73c --- /dev/null +++ b/SOURCES/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch @@ -0,0 +1,106 @@ +From ea4d8424fb2053b1cbb9538190b2b06351054125 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:34 +0100 +Subject: [PATCH 3/5] iotests: Test blockdev-reopen with iothreads and + throttling + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [2/2] 91d365864c391ca7db7db13260913fb61987b833 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +The 'throttle' block driver implements .bdrv_co_drain_end, so +blockdev-reopen will have to wait for it to complete in the polling +loop at the end of qmp_blockdev_reopen(). This makes AIO_WAIT_WHILE() +release the AioContext lock, which causes a crash if the lock hasn't +correctly been taken. + +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-3-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit ee810602376125ca0e0afd6b7c715e13740978ea) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/245 | 36 +++++++++++++++++++++++++++++++++--- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 35 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index 24ac43f70e..8cbed7821b 100755 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1138,12 +1138,13 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): +- opts = hd_opts(0) ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None, ++ opts_a = None, opts_b = None): ++ opts = opts_a or hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) + +- opts2 = hd_opts(2) ++ opts2 = opts_b or hd_opts(2) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2) + self.assert_qmp(result, 'return', {}) + +@@ -1194,6 +1195,35 @@ class TestBlockdevReopen(iotests.QMPTestCase): + def test_iothreads_switch_overlay(self): + self.run_test_iothreads('', 'iothread0') + ++ def test_iothreads_with_throttling(self): ++ # Create a throttle-group object ++ opts = { 'qom-type': 'throttle-group', 'id': 'group0', ++ 'limits': { 'iops-total': 1000 } } ++ result = self.vm.qmp('object-add', conv_keys = False, **opts) ++ self.assert_qmp(result, 'return', {}) ++ ++ # Options with a throttle filter between format and protocol ++ opts = [ ++ { ++ 'driver': iotests.imgfmt, ++ 'node-name': f'hd{idx}', ++ 'file' : { ++ 'node-name': f'hd{idx}-throttle', ++ 'driver': 'throttle', ++ 'throttle-group': 'group0', ++ 'file': { ++ 'driver': 'file', ++ 'node-name': f'hd{idx}-file', ++ 'filename': hd_path[idx], ++ }, ++ }, ++ } ++ for idx in (0, 2) ++ ] ++ ++ self.run_test_iothreads('iothread0', 'iothread0', None, ++ opts[0], opts[1]) ++ + if __name__ == '__main__': + iotests.activate_logging() + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index 4eced19294..a4e04a3266 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -17,8 +17,8 @@ read 1/1 bytes at offset 262152 + read 1/1 bytes at offset 262160 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +-............... ++................ + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 26 tests + + OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-block-status-cache-New-test.patch b/SOURCES/kvm-iotests-block-status-cache-New-test.patch new file mode 100644 index 0000000..25f057c --- /dev/null +++ b/SOURCES/kvm-iotests-block-status-cache-New-test.patch @@ -0,0 +1,197 @@ +From 0ba4c0836f702bb3abbd173c7ee486a8247331ae Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 18:00:00 +0100 +Subject: [PATCH 7/7] iotests/block-status-cache: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [2/2] ba86b4db32c33e17a85f476d445ef0523cf8f60e +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a new test to verify that want_zero=false block-status calls do not +pollute the block-status cache for want_zero=true calls. + +We check want_zero=true calls and their results using `qemu-img map` +(over NBD), and want_zero=false calls also using `qemu-img map` over +NBD, but using the qemu:allocation-depth context. + +(This test case cannot be integrated into nbd-qemu-allocation, because +that is a qcow2 test, and this is a raw test.) + +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-3-hreitz@redhat.com> +Reviewed-by: Nir Soffer +Reviewed-by: Eric Blake +Tested-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ + .../qemu-iotests/tests/block-status-cache.out | 5 + + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/block-status-cache + create mode 100644 tests/qemu-iotests/tests/block-status-cache.out + +diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache +new file mode 100755 +index 0000000000..6fa10bb8f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache +@@ -0,0 +1,139 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test cases for the block-status cache. ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import signal ++import iotests ++from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd ++ ++ ++image_size = 1 * 1024 * 1024 ++test_img = os.path.join(iotests.test_dir, 'test.img') ++ ++nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++class TestBscWithNbd(iotests.QMPTestCase): ++ def setUp(self) -> None: ++ """Just create an empty image with a read-only NBD server on it""" ++ assert qemu_img_create('-f', iotests.imgfmt, test_img, ++ str(image_size)) == 0 ++ ++ # Pass --allocation-depth to enable the qemu:allocation-depth context, ++ # which we are going to query to provoke a block-status inquiry with ++ # want_zero=false. ++ assert qemu_nbd(f'--socket={nbd_sock}', ++ f'--format={iotests.imgfmt}', ++ '--persistent', ++ '--allocation-depth', ++ '--read-only', ++ f'--pid-file={nbd_pidfile}', ++ test_img) \ ++ == 0 ++ ++ def tearDown(self) -> None: ++ with open(nbd_pidfile, encoding='utf-8') as f: ++ pid = int(f.read()) ++ os.kill(pid, signal.SIGTERM) ++ os.remove(nbd_pidfile) ++ os.remove(test_img) ++ ++ def test_with_zero_bug(self) -> None: ++ """ ++ Verify that the block-status cache is not corrupted by a ++ want_zero=false call. ++ We can provoke a want_zero=false call with `qemu-img map` over NBD with ++ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` ++ (which results in want_zero=true), then using said ++ qemu:allocation-depth context, and finally another normal `map` to ++ verify that the cache has not been corrupted. ++ """ ++ ++ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' ++ nbd_img_opts_alloc_depth = nbd_img_opts + \ ++ ',x-dirty-bitmap=qemu:allocation-depth' ++ ++ # Normal map, results in want_zero=true. ++ # This will probably detect an allocated data sector first (qemu likes ++ # to allocate the first sector to facilitate alignment probing), and ++ # then the rest to be zero. The BSC will thus contain (if anything) ++ # one range covering the first sector. ++ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ # qemu:allocation-depth maps for want_zero=false. ++ # want_zero=false should (with the file driver, which the server is ++ # using) report everything as data. While this is sufficient for ++ # want_zero=false, this is nothing that should end up in the ++ # block-status cache. ++ # Due to a bug, this information did end up in the cache, though, and ++ # this would lead to wrong information being returned on subsequent ++ # want_zero=true calls. ++ # ++ # We need to run this map twice: On the first call, we probably still ++ # have the first sector in the cache, and so this will be served from ++ # the cache; and only the subsequent range will be queried from the ++ # block driver. This subsequent range will then be entered into the ++ # cache. ++ # If we did a want_zero=true call at this point, we would thus get ++ # correct information: The first sector is not covered by the cache, so ++ # we would get fresh block-status information from the driver, which ++ # would return a data range, and this would then go into the cache, ++ # evicting the wrong range from the want_zero=false call before. ++ # ++ # Therefore, we need a second want_zero=false map to reproduce: ++ # Since the first sector is not in the cache, the query for its status ++ # will go to the driver, which will return a result that reports the ++ # whole image to be a single data area. This result will then go into ++ # the cache, and so the cache will then report the whole image to ++ # contain data. ++ # ++ # Note that once the cache reports the whole image to contain data, any ++ # subsequent map operation will be served from the cache, and so we can ++ # never loop too many times here. ++ for _ in range(2): ++ # (Ignore the result, this is just to contaminate the cache) ++ qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts_alloc_depth) ++ ++ # Now let's see whether the cache reports everything as data, or ++ # whether we get correct information (i.e. the same as we got on our ++ # first attempt). ++ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ if map_pre != map_post: ++ print('ERROR: Map information differs before and after querying ' + ++ 'qemu:allocation-depth') ++ print('Before:') ++ print(map_pre) ++ print('After:') ++ print(map_post) ++ ++ self.fail("Map information differs") ++ ++ ++if __name__ == '__main__': ++ # The block-status cache only works on the protocol layer, so to test it, ++ # we can only use the raw format ++ iotests.main(supported_fmts=['raw'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch b/SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch new file mode 100644 index 0000000..d40e25e --- /dev/null +++ b/SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch @@ -0,0 +1,153 @@ +From 27042ff7aca4366c50e8ed66b47487d46774d16a Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:55 +0100 +Subject: [PATCH 24/24] iotests/graph-changes-while-io: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [3/3] b9dffe09bef6cf9b2f0aad69b327ea1df92e847a +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Test the following scenario: +1. Some block node (null-co) attached to a user (here: NBD server) that + performs I/O and keeps the node in an I/O thread +2. Repeatedly run blockdev-add/blockdev-del to add/remove an overlay + to/from that node + +Each blockdev-add triggers bdrv_refresh_limits(), and because +blockdev-add runs in the main thread, it does not stop the I/O requests. +I/O can thus happen while the limits are refreshed, and when such a +request sees a temporarily invalid block limit (e.g. alignment is 0), +this may easily crash qemu (or the storage daemon in this case). + +The block layer needs to ensure that I/O requests to a node are paused +while that node's BlockLimits are refreshed. + +Signed-off-by: Hanna Reitz +Reviewed-by: Eric Blake +Message-Id: <20220216105355.30729-4-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 971bea8089531af56b1bbd9ce62e756bdf006711) +Signed-off-by: Hanna Reitz +--- + .../qemu-iotests/tests/graph-changes-while-io | 91 +++++++++++++++++++ + .../tests/graph-changes-while-io.out | 5 + + 2 files changed, 96 insertions(+) + create mode 100755 tests/qemu-iotests/tests/graph-changes-while-io + create mode 100644 tests/qemu-iotests/tests/graph-changes-while-io.out + +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +new file mode 100755 +index 0000000000..567e8cf21e +--- /dev/null ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -0,0 +1,91 @@ ++#!/usr/bin/env python3 ++# group: rw ++# ++# Test graph changes while I/O is happening ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++from threading import Thread ++import iotests ++from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ ++ QemuStorageDaemon ++ ++ ++top = os.path.join(iotests.test_dir, 'top.img') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++def do_qemu_img_bench() -> None: ++ """ ++ Do some I/O requests on `nbd_sock`. ++ """ ++ assert qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ f'nbd+unix:///node0?socket={nbd_sock}') == 0 ++ ++ ++class TestGraphChangesWhileIO(QMPTestCase): ++ def setUp(self) -> None: ++ # Create an overlay that can be added at runtime on top of the ++ # null-co block node that will receive I/O ++ assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://', ++ top) == 0 ++ ++ # QSD instance with a null-co block node in an I/O thread, ++ # exported over NBD (on `nbd_sock`, export name "node0") ++ self.qsd = QemuStorageDaemon( ++ '--object', 'iothread,id=iothread0', ++ '--blockdev', 'null-co,node-name=node0,read-zeroes=true', ++ '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}', ++ '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' + ++ 'fixed-iothread=true,writable=true', ++ qmp=True ++ ) ++ ++ def tearDown(self) -> None: ++ self.qsd.stop() ++ ++ def test_blockdev_add_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench) ++ bench_thr.start() ++ ++ # While qemu-img bench is running, repeatedly add and remove an ++ # overlay to/from node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': 'node0', ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-del', { ++ 'node-name': 'overlay' ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ bench_thr.join() ++ ++if __name__ == '__main__': ++ # Format must support raw backing files ++ iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.35.3 + diff --git a/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch b/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch new file mode 100644 index 0000000..0214854 --- /dev/null +++ b/SOURCES/kvm-iotests-stream-error-on-reset-New-test.patch @@ -0,0 +1,198 @@ +From ffdec41922a34b6fe4e7e11f259553d65b41563e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:13 +0000 +Subject: [PATCH 4/6] iotests/stream-error-on-reset: New test + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [2/2] 0ecb7010d9c121398e7ee22ee47dd85d89bcd941 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +Author: Hanna Reitz + +Test the following scenario: +- Simple stream block in two-layer backing chain (base and top) +- The job is drained via blk_drain(), then an error occurs while the job + settles the ongoing request +- And so the job completes while in blk_drain() + +This was reported as a segfault, but is fixed by "block-backend: prevent +dangling BDS pointers across aio_poll()". + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Hanna Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) +Signed-off-by: Stefan Hajnoczi +--- + .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ + .../tests/stream-error-on-reset.out | 5 + + 2 files changed, 145 insertions(+) + create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset + create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out + +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset +new file mode 100755 +index 0000000000..7eaedb24d7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test what happens when a stream job completes in a blk_drain(). ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase ++ ++ ++image_size = 1 * 1024 * 1024 ++data_size = 64 * 1024 ++base = os.path.join(iotests.test_dir, 'base.img') ++top = os.path.join(iotests.test_dir, 'top.img') ++ ++ ++# We want to test completing a stream job in a blk_drain(). ++# ++# The blk_drain() we are going to use is a virtio-scsi device resetting, ++# which we can trigger by resetting the system. ++# ++# In order to have the block job complete on drain, we (1) throttle its ++# base image so we can start the drain after it has begun, but before it ++# completes, and (2) make it encounter an I/O error on the ensuing write. ++# (If it completes regularly, the completion happens after the drain for ++# some reason.) ++ ++class TestStreamErrorOnReset(QMPTestCase): ++ def setUp(self) -> None: ++ """ ++ Create two images: ++ - base image {base} with {data_size} bytes allocated ++ - top image {top} without any data allocated ++ ++ And the following VM configuration: ++ - base image throttled to {data_size} ++ - top image with a blkdebug configuration so the first write access ++ to it will result in an error ++ - top image is attached to a virtio-scsi device ++ """ ++ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 ++ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 ++ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 ++ ++ self.vm = iotests.VM() ++ self.vm.add_args('-accel', 'tcg') # Make throttling work properly ++ self.vm.add_object(self.vm.qmp_to_opts({ ++ 'qom-type': 'throttle-group', ++ 'id': 'thrgr', ++ 'x-bps-total': str(data_size) ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'base', ++ 'file': { ++ 'driver': 'throttle', ++ 'throttle-group': 'thrgr', ++ 'file': { ++ 'driver': 'file', ++ 'filename': base ++ } ++ } ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'top', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'node-name': 'top-blkdebug', ++ 'inject-error': [{ ++ 'event': 'pwritev', ++ 'immediately': 'true', ++ 'once': 'true' ++ }], ++ 'image': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }, ++ 'backing': 'base' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'virtio-scsi', ++ 'id': 'vscsi' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'scsi-hd', ++ 'bus': 'vscsi.0', ++ 'drive': 'top' ++ })) ++ self.vm.launch() ++ ++ def tearDown(self) -> None: ++ self.vm.shutdown() ++ os.remove(top) ++ os.remove(base) ++ ++ def test_stream_error_on_reset(self) -> None: ++ # Launch a stream job, which will take at least a second to ++ # complete, because the base image is throttled (so we can ++ # get in between it having started and it having completed) ++ res = self.vm.qmp('block-stream', job_id='stream', device='top') ++ self.assert_qmp(res, 'return', {}) ++ ++ while True: ++ ev = self.vm.event_wait('JOB_STATUS_CHANGE') ++ if ev['data']['status'] == 'running': ++ # Once the stream job is running, reset the system, which ++ # forces the virtio-scsi device to be reset, thus draining ++ # the stream job, and making it complete. Completing ++ # inside of that drain should not result in a segfault. ++ res = self.vm.qmp('system_reset') ++ self.assert_qmp(res, 'return', {}) ++ elif ev['data']['status'] == 'null': ++ # The test is done once the job is gone ++ break ++ ++ ++if __name__ == '__main__': ++ # Passes with any format with backing file support, but qed and ++ # qcow1 do not seem to exercise the used-to-be problematic code ++ # path, so there is no point in having them in this list ++ iotests.main(supported_fmts=['qcow2', 'vmdk'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch new file mode 100644 index 0000000..539897f --- /dev/null +++ b/SOURCES/kvm-iotests.py-Add-QemuStorageDaemon-class.patch @@ -0,0 +1,92 @@ +From 34ffcd1a463bd3c1d36ed2f33dd6335b35b38460 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:09 +0100 +Subject: [PATCH 3/6] iotests.py: Add QemuStorageDaemon class + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [3/6] 754fe76bc5e8be57f4b78f176531014c4a12b044 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +This is a rather simple class that allows creating a QSD instance +running in the background and stopping it when no longer needed. + +The __del__ handler is a safety net for when something goes so wrong in +a test that e.g. the tearDown() method is not called (e.g. setUp() +launches the QSD, but then launching a VM fails). We do not want the +QSD to continue running after the test has failed, so __del__() will +take care to kill it. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 83bfedb902..a51b5ce8cd 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -72,6 +72,8 @@ + qemu_prog = os.environ.get('QEMU_PROG', 'qemu') + qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') + ++qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') ++ + gdb_qemu_env = os.environ.get('GDB_OPTIONS') + qemu_gdb = [] + if gdb_qemu_env: +@@ -312,6 +314,44 @@ def cmd(self, cmd): + return self._read_output() + + ++class QemuStorageDaemon: ++ def __init__(self, *args: str, instance_id: str = 'a'): ++ assert '--pidfile' not in args ++ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') ++ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] ++ ++ # Cannot use with here, we want the subprocess to stay around ++ # pylint: disable=consider-using-with ++ self._p = subprocess.Popen(all_args) ++ while not os.path.exists(self.pidfile): ++ if self._p.poll() is not None: ++ cmd = ' '.join(all_args) ++ raise RuntimeError( ++ 'qemu-storage-daemon terminated with exit code ' + ++ f'{self._p.returncode}: {cmd}') ++ ++ time.sleep(0.01) ++ ++ with open(self.pidfile, encoding='utf-8') as f: ++ self._pid = int(f.read().strip()) ++ ++ assert self._pid == self._p.pid ++ ++ def stop(self, kill_signal=15): ++ self._p.send_signal(kill_signal) ++ self._p.wait() ++ self._p = None ++ ++ try: ++ os.remove(self.pidfile) ++ except OSError: ++ pass ++ ++ def __del__(self): ++ if self._p is not None: ++ self.stop(kill_signal=9) ++ ++ + def qemu_nbd(*args): + '''Run qemu-nbd in daemon mode and return the parent's exit code''' + return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) +-- +2.27.0 + diff --git a/SOURCES/kvm-kvm-Atomic-memslot-updates.patch b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch new file mode 100644 index 0000000..d97a2c9 --- /dev/null +++ b/SOURCES/kvm-kvm-Atomic-memslot-updates.patch @@ -0,0 +1,290 @@ +From 93ec857c46911b95ed8e3abc6a9d432ae847c084 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:51:56 -0500 +Subject: [PATCH 06/11] kvm: Atomic memslot updates + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 247: accel: introduce accelerator blocker API +RH-Bugzilla: 2161188 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 520e41c0f58066a7381a5f6b32b81bc01cce51c0 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2161188 + +commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 +Author: David Hildenbrand +Date: Fri Nov 11 10:47:58 2022 -0500 + + kvm: Atomic memslot updates + + If we update an existing memslot (e.g., resize, split), we temporarily + remove the memslot to re-add it immediately afterwards. These updates + are not atomic, especially not for KVM VCPU threads, such that we can + get spurious faults. + + Let's inhibit most KVM ioctls while performing relevant updates, such + that we can perform the update just as if it would happen atomically + without additional kernel support. + + We capture the add/del changes and apply them in the notifier commit + stage instead. There, we can check for overlaps and perform the ioctl + inhibiting only if really required (-> overlap). + + To keep things simple we don't perform additional checks that wouldn't + actually result in an overlap -- such as !RAM memory regions in some + cases (see kvm_set_phys_mem()). + + To minimize cache-line bouncing, use a separate indicator + (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock + while performing both actions (removing+re-adding). + + We have to wait until all IOCTLs were exited and block new ones from + getting executed. + + This approach cannot result in a deadlock as long as the inhibitor does + not hold any locks that might hinder an IOCTL from getting finished and + exited - something fairly unusual. The inhibitor will always hold the BQL. + + AFAIKs, one possible candidate would be userfaultfd. If a page cannot be + placed (e.g., during postcopy), because we're waiting for a lock, or if the + userfaultfd thread cannot process a fault, because it is waiting for a + lock, there could be a deadlock. However, the BQL is not applicable here, + because any other guest memory access while holding the BQL would already + result in a deadlock. + + Nothing else in the kernel should block forever and wait for userspace + intervention. + + Note: pause_all_vcpus()/resume_all_vcpus() or + start_exclusive()/end_exclusive() cannot be used, as they either drop + the BQL or require to be called without the BQL - something inhibitors + cannot handle. We need a low-level locking mechanism that is + deadlock-free even when not releasing the BQL. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Tested-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + accel/kvm/kvm-all.c: include "sysemu/dirtylimit.h" is missing in + rhel 8.8.0 + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- + include/sysemu/kvm_int.h | 8 ++++ + 2 files changed, 98 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 221aadfda7..3b7bc39823 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/accel-blocker.h" + #include "qemu/bswap.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" +@@ -45,6 +46,7 @@ + #include "qemu/guest-random.h" + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" ++#include "qemu/range.h" + + #include "hw/boards.h" + +@@ -1334,6 +1336,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++/* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -1368,14 +1371,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; + +- kvm_slots_lock(); +- + if (!add) { + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { +- goto out; ++ return; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* +@@ -1413,7 +1414,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + start_addr += slot_size; + size -= slot_size; + } while (size); +- goto out; ++ return; + } + + /* register the new slot */ +@@ -1438,9 +1439,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram += slot_size; + size -= slot_size; + } while (size); +- +-out: +- kvm_slots_unlock(); + } + + static void *kvm_dirty_ring_reaper_thread(void *data) +@@ -1492,18 +1490,95 @@ static void kvm_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; + +- memory_region_ref(section->mr); +- kvm_set_phys_mem(kml, section, true); ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + } + + static void kvm_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; ++ ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); ++} ++ ++static void kvm_region_commit(MemoryListener *listener) ++{ ++ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, ++ listener); ++ KVMMemoryUpdate *u1, *u2; ++ bool need_inhibit = false; ++ ++ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && ++ QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ return; ++ } ++ ++ /* ++ * We have to be careful when regions to add overlap with ranges to remove. ++ * We have to simulate atomic KVM memslot updates by making sure no ioctl() ++ * is currently active. ++ * ++ * The lists are order by addresses, so it's easy to find overlaps. ++ */ ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ while (u1 && u2) { ++ Range r1, r2; ++ ++ range_init_nofail(&r1, u1->section.offset_within_address_space, ++ int128_get64(u1->section.size)); ++ range_init_nofail(&r2, u2->section.offset_within_address_space, ++ int128_get64(u2->section.size)); ++ ++ if (range_overlaps_range(&r1, &r2)) { ++ need_inhibit = true; ++ break; ++ } ++ if (range_lob(&r1) < range_lob(&r2)) { ++ u1 = QSIMPLEQ_NEXT(u1, next); ++ } else { ++ u2 = QSIMPLEQ_NEXT(u2, next); ++ } ++ } ++ ++ kvm_slots_lock(); ++ if (need_inhibit) { ++ accel_ioctl_inhibit_begin(); ++ } ++ ++ /* Remove all memslots before adding the new ones. */ ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); + +- kvm_set_phys_mem(kml, section, false); +- memory_region_unref(section->mr); ++ kvm_set_phys_mem(kml, &u1->section, false); ++ memory_region_unref(u1->section.mr); ++ ++ g_free(u1); ++ } ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); ++ ++ memory_region_ref(u1->section.mr); ++ kvm_set_phys_mem(kml, &u1->section, true); ++ ++ g_free(u1); ++ } ++ ++ if (need_inhibit) { ++ accel_ioctl_inhibit_end(); ++ } ++ kvm_slots_unlock(); + } + + static void kvm_log_sync(MemoryListener *listener, +@@ -1647,8 +1722,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + kml->slots[i].slot = i; + } + ++ QSIMPLEQ_INIT(&kml->transaction_add); ++ QSIMPLEQ_INIT(&kml->transaction_del); ++ + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; ++ kml->listener.commit = kvm_region_commit; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.priority = 10; +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 1f5487d9b7..7e18c0a3c0 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -11,6 +11,7 @@ + + #include "exec/memory.h" + #include "qemu/accel.h" ++#include "qemu/queue.h" + #include "sysemu/kvm.h" + + typedef struct KVMSlot +@@ -30,10 +31,17 @@ typedef struct KVMSlot + ram_addr_t ram_start_offset; + } KVMSlot; + ++typedef struct KVMMemoryUpdate { ++ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; ++ MemoryRegionSection section; ++} KVMMemoryUpdate; ++ + typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; + } KVMMemoryListener; + + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, +-- +2.37.3 + diff --git a/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch new file mode 100644 index 0000000..7c1fcc4 --- /dev/null +++ b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -0,0 +1,49 @@ +From 99d33621440fd30e0da2974dafb0cd372334305a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:12 +0100 +Subject: [PATCH 2/2] linux-aio: explain why max batch is checked in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] 8617870ed70e3a57269f06eeb242d0fab79a66fb +RH-Bugzilla: 2105410 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Hanna Reitz +RH-Acked-by: Stefano Garzarella + +It may not be obvious why laio_io_unplug() checks max batch. I discussed +this with Stefano and have added a comment summarizing the reason. + +Cc: Stefano Garzarella +Cc: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 77f17ad596..85650c4222 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -362,6 +362,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + assert(s->io_q.plugged); + s->io_q.plugged--; + ++ /* ++ * Why max batch checking is performed here: ++ * Another BDS may have queued requests with a higher dev_max_batch and ++ * therefore in_queue could now exceed our dev_max_batch. Re-check the max ++ * batch so we can honor our device's dev_max_batch. ++ */ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..c89fc72 --- /dev/null +++ b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,56 @@ +From 0fbb0c87628bef2cb4d1b7748d67020dde50cdef Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH 1/2] linux-aio: fix unbalanced plugged counter in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] f518df755090289905898a36922992288688e338 +RH-Bugzilla: 2105410 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Hanna Reitz +RH-Acked-by: Stefano Garzarella + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index f53ae72e21..77f17ad596 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -360,8 +360,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch b/SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch new file mode 100644 index 0000000..90adb5c --- /dev/null +++ b/SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch @@ -0,0 +1,1227 @@ +From e9ecd7543fa8d3e9fe80f4144e4c0461f783fc37 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 8 Feb 2022 15:48:05 -0500 +Subject: [PATCH 03/24] linux-headers: Update headers to v5.17-rc1 + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [3/13] 63593c2431eabf02222f37467736b580022b94c8 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Update headers to 5.17-rc1. I need latest fuse changes. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: <20220208204813.682906-3-vgoyal@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ef17dd6a8e6b6e3aeb29233996d44dfcb736d515) +Signed-off-by: Paul Lai +--- + include/standard-headers/asm-x86/kvm_para.h | 1 + + include/standard-headers/drm/drm_fourcc.h | 11 ++ + include/standard-headers/linux/ethtool.h | 1 + + include/standard-headers/linux/fuse.h | 60 +++++++- + include/standard-headers/linux/pci_regs.h | 142 +++++++++--------- + include/standard-headers/linux/virtio_gpio.h | 72 +++++++++ + include/standard-headers/linux/virtio_i2c.h | 47 ++++++ + include/standard-headers/linux/virtio_iommu.h | 8 +- + .../standard-headers/linux/virtio_pcidev.h | 65 ++++++++ + include/standard-headers/linux/virtio_scmi.h | 24 +++ + linux-headers/asm-generic/unistd.h | 5 +- + linux-headers/asm-mips/unistd_n32.h | 2 + + linux-headers/asm-mips/unistd_n64.h | 2 + + linux-headers/asm-mips/unistd_o32.h | 2 + + linux-headers/asm-powerpc/unistd_32.h | 2 + + linux-headers/asm-powerpc/unistd_64.h | 2 + + linux-headers/asm-riscv/bitsperlong.h | 14 ++ + linux-headers/asm-riscv/mman.h | 1 + + linux-headers/asm-riscv/unistd.h | 44 ++++++ + linux-headers/asm-s390/unistd_32.h | 2 + + linux-headers/asm-s390/unistd_64.h | 2 + + linux-headers/asm-x86/kvm.h | 16 +- + linux-headers/asm-x86/unistd_32.h | 1 + + linux-headers/asm-x86/unistd_64.h | 1 + + linux-headers/asm-x86/unistd_x32.h | 1 + + linux-headers/linux/kvm.h | 17 +++ + 26 files changed, 469 insertions(+), 76 deletions(-) + create mode 100644 include/standard-headers/linux/virtio_gpio.h + create mode 100644 include/standard-headers/linux/virtio_i2c.h + create mode 100644 include/standard-headers/linux/virtio_pcidev.h + create mode 100644 include/standard-headers/linux/virtio_scmi.h + create mode 100644 linux-headers/asm-riscv/bitsperlong.h + create mode 100644 linux-headers/asm-riscv/mman.h + create mode 100644 linux-headers/asm-riscv/unistd.h + +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index 204cfb8640..f0235e58a1 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -8,6 +8,7 @@ + * should be used to determine that a VM is running under KVM. + */ + #define KVM_CPUID_SIGNATURE 0x40000000 ++#define KVM_SIGNATURE "KVMKVMKVM\0\0\0" + + /* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 2c025cb4fe..4888f85f69 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -313,6 +313,13 @@ extern "C" { + */ + #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ + ++/* 2 plane YCbCr420. ++ * 3 10 bit components and 2 padding bits packed into 4 bytes. ++ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian ++ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian ++ */ ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ ++ + /* 3 plane non-subsampled (444) YCbCr + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian +@@ -853,6 +860,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + * and UV. Some SAND-using hardware stores UV in a separate tiled + * image from Y to reduce the column height, which is not supported + * with these modifiers. ++ * ++ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also ++ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes ++ * wide, but as this is a 10 bpp format that translates to 96 pixels. + */ + + #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 688eb8dc39..38d5a4cd6e 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -231,6 +231,7 @@ enum tunable_id { + ETHTOOL_RX_COPYBREAK, + ETHTOOL_TX_COPYBREAK, + ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ ++ ETHTOOL_TX_COPYBREAK_BUF_SIZE, + /* + * Add your fresh new tunable attribute above and remember to update + * tunable_strings[] in net/ethtool/common.c +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 23ea31708b..bda06258be 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -184,6 +184,16 @@ + * + * 7.34 + * - add FUSE_SYNCFS ++ * ++ * 7.35 ++ * - add FOPEN_NOFLUSH ++ * ++ * 7.36 ++ * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag ++ * - add flags2 to fuse_init_in and fuse_init_out ++ * - add FUSE_SECURITY_CTX init flag ++ * - add security context to create, mkdir, symlink, and mknod requests ++ * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + */ + + #ifndef _LINUX_FUSE_H +@@ -215,7 +225,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 34 ++#define FUSE_KERNEL_MINOR_VERSION 36 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -286,12 +296,14 @@ struct fuse_file_lock { + * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory + * FOPEN_STREAM: the file is stream-like (no file position at all) ++ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) + #define FOPEN_NONSEEKABLE (1 << 2) + #define FOPEN_CACHE_DIR (1 << 3) + #define FOPEN_STREAM (1 << 4) ++#define FOPEN_NOFLUSH (1 << 5) + + /** + * INIT request/reply flags +@@ -332,6 +344,11 @@ struct fuse_file_lock { + * write/truncate sgid is killed only if file has group + * execute permission. (Same as Linux VFS behavior). + * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in ++ * FUSE_INIT_EXT: extended fuse_init_in request ++ * FUSE_INIT_RESERVED: reserved, do not use ++ * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and ++ * mknod ++ * FUSE_HAS_INODE_DAX: use per inode DAX + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -363,6 +380,11 @@ struct fuse_file_lock { + #define FUSE_SUBMOUNTS (1 << 27) + #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) + #define FUSE_SETXATTR_EXT (1 << 29) ++#define FUSE_INIT_EXT (1 << 30) ++#define FUSE_INIT_RESERVED (1 << 31) ++/* bits 32..63 get shifted down 32 bits into the flags2 field */ ++#define FUSE_SECURITY_CTX (1ULL << 32) ++#define FUSE_HAS_INODE_DAX (1ULL << 33) + + /** + * CUSE INIT request/reply flags +@@ -445,8 +467,10 @@ struct fuse_file_lock { + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: Object is a submount root ++ * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode + */ + #define FUSE_ATTR_SUBMOUNT (1 << 0) ++#define FUSE_ATTR_DAX (1 << 1) + + /** + * Open flags +@@ -732,6 +756,8 @@ struct fuse_init_in { + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; ++ uint32_t flags2; ++ uint32_t unused[11]; + }; + + #define FUSE_COMPAT_INIT_OUT_SIZE 8 +@@ -748,7 +774,8 @@ struct fuse_init_out { + uint32_t time_gran; + uint16_t max_pages; + uint16_t map_alignment; +- uint32_t unused[8]; ++ uint32_t flags2; ++ uint32_t unused[7]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -856,9 +883,12 @@ struct fuse_dirent { + char name[]; + }; + +-#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +-#define FUSE_DIRENT_ALIGN(x) \ ++/* Align variable length records to 64bit boundary */ ++#define FUSE_REC_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) ++ ++#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) ++#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x) + #define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) + +@@ -975,4 +1005,26 @@ struct fuse_syncfs_in { + uint64_t padding; + }; + ++/* ++ * For each security context, send fuse_secctx with size of security context ++ * fuse_secctx will be followed by security context name and this in turn ++ * will be followed by actual context label. ++ * fuse_secctx, name, context ++ */ ++struct fuse_secctx { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++/* ++ * Contains the information about how many fuse_secctx structures are being ++ * sent and what's the total size of all security contexts (including ++ * size of fuse_secctx_header). ++ * ++ */ ++struct fuse_secctx_header { ++ uint32_t size; ++ uint32_t nr_secctx; ++}; ++ + #endif /* _LINUX_FUSE_H */ +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index ff6ccbc6ef..bee1a9ed6e 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -301,23 +301,23 @@ + #define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ + #define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +-/* Message Signalled Interrupt registers */ ++/* Message Signaled Interrupt registers */ + +-#define PCI_MSI_FLAGS 2 /* Message Control */ ++#define PCI_MSI_FLAGS 0x02 /* Message Control */ + #define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ + #define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ + #define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ + #define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ + #define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ + #define PCI_MSI_RFU 3 /* Rest of capability flags */ +-#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +-#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +-#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +-#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ +-#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ +-#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +-#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ +-#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ ++#define PCI_MSI_ADDRESS_LO 0x04 /* Lower 32 bits */ ++#define PCI_MSI_ADDRESS_HI 0x08 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ ++#define PCI_MSI_DATA_32 0x08 /* 16 bits of data for 32-bit devices */ ++#define PCI_MSI_MASK_32 0x0c /* Mask bits register for 32-bit devices */ ++#define PCI_MSI_PENDING_32 0x10 /* Pending intrs for 32-bit devices */ ++#define PCI_MSI_DATA_64 0x0c /* 16 bits of data for 64-bit devices */ ++#define PCI_MSI_MASK_64 0x10 /* Mask bits register for 64-bit devices */ ++#define PCI_MSI_PENDING_64 0x14 /* Pending intrs for 64-bit devices */ + + /* MSI-X registers (in MSI-X capability) */ + #define PCI_MSIX_FLAGS 2 /* Message Control */ +@@ -335,10 +335,10 @@ + + /* MSI-X Table entry format (in memory mapped by a BAR) */ + #define PCI_MSIX_ENTRY_SIZE 16 +-#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ +-#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ +-#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ +-#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ ++#define PCI_MSIX_ENTRY_LOWER_ADDR 0x0 /* Message Address */ ++#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ ++#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ ++#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ + #define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 + + /* CompactPCI Hotswap Register */ +@@ -470,7 +470,7 @@ + + /* PCI Express capability registers */ + +-#define PCI_EXP_FLAGS 2 /* Capabilities register */ ++#define PCI_EXP_FLAGS 0x02 /* Capabilities register */ + #define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ + #define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ + #define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +@@ -484,7 +484,7 @@ + #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ + #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ + #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +-#define PCI_EXP_DEVCAP 4 /* Device capabilities */ ++#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */ + #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ + #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ + #define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ +@@ -497,7 +497,7 @@ + #define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ + #define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ + #define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +-#define PCI_EXP_DEVCTL 8 /* Device Control */ ++#define PCI_EXP_DEVCTL 0x08 /* Device Control */ + #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ + #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ + #define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +@@ -522,7 +522,7 @@ + #define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ + #define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ + #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ +-#define PCI_EXP_DEVSTA 10 /* Device Status */ ++#define PCI_EXP_DEVSTA 0x0a /* Device Status */ + #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ + #define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ + #define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ +@@ -530,7 +530,7 @@ + #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ + #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ + #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ +-#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ ++#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */ + #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ + #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ + #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ +@@ -549,7 +549,7 @@ + #define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ + #define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ + #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ +-#define PCI_EXP_LNKCTL 16 /* Link Control */ ++#define PCI_EXP_LNKCTL 0x10 /* Link Control */ + #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ + #define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ + #define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ +@@ -562,7 +562,7 @@ + #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ + #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ + #define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ +-#define PCI_EXP_LNKSTA 18 /* Link Status */ ++#define PCI_EXP_LNKSTA 0x12 /* Link Status */ + #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ + #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ + #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ +@@ -582,7 +582,7 @@ + #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ + #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ + #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ +-#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ ++#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */ + #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ + #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ + #define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +@@ -595,7 +595,7 @@ + #define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ + #define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ + #define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ +-#define PCI_EXP_SLTCTL 24 /* Slot Control */ ++#define PCI_EXP_SLTCTL 0x18 /* Slot Control */ + #define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ + #define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ + #define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +@@ -617,7 +617,7 @@ + #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ + #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ + #define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ +-#define PCI_EXP_SLTSTA 26 /* Slot Status */ ++#define PCI_EXP_SLTSTA 0x1a /* Slot Status */ + #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ + #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ + #define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +@@ -627,15 +627,15 @@ + #define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ + #define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ + #define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ +-#define PCI_EXP_RTCTL 28 /* Root Control */ ++#define PCI_EXP_RTCTL 0x1c /* Root Control */ + #define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ + #define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ + #define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ + #define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ + #define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ +-#define PCI_EXP_RTCAP 30 /* Root Capabilities */ ++#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ + #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ +-#define PCI_EXP_RTSTA 32 /* Root Status */ ++#define PCI_EXP_RTSTA 0x20 /* Root Status */ + #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ + #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ + /* +@@ -646,7 +646,7 @@ + * Use pcie_capability_read_word() and similar interfaces to use them + * safely. + */ +-#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ ++#define PCI_EXP_DEVCAP2 0x24 /* Device Capabilities 2 */ + #define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ + #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ + #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +@@ -658,7 +658,7 @@ + #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ + #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ + #define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ +-#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ ++#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */ + #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ + #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ + #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +@@ -670,9 +670,9 @@ + #define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ + #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ + #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ +-#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ +-#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ +-#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ ++#define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ ++#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ ++#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ + #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ + #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ + #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ +@@ -680,7 +680,7 @@ + #define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ + #define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */ + #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ +-#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ ++#define PCI_EXP_LNKCTL2 0x30 /* Link Control 2 */ + #define PCI_EXP_LNKCTL2_TLS 0x000f + #define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ + #define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ +@@ -691,12 +691,12 @@ + #define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ + #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ + #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ +-#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +-#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ ++#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ ++#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ ++#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ + #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ +-#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ +-#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ ++#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ ++#define PCI_EXP_SLTSTA2 0x3a /* Slot Status 2 */ + + /* Extended Capabilities (PCI-X 2.0 and Express) */ + #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +@@ -742,7 +742,7 @@ + #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 + + /* Advanced Error Reporting */ +-#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ ++#define PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */ + #define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ + #define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ + #define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ +@@ -760,11 +760,11 @@ + #define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ + #define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ + #define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ +-#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ ++#define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */ + /* Same bits as above */ +-#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ ++#define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */ + /* Same bits as above */ +-#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ ++#define PCI_ERR_COR_STATUS 0x10 /* Correctable Error Status */ + #define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ + #define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ + #define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +@@ -773,20 +773,20 @@ + #define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ + #define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ + #define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ +-#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ ++#define PCI_ERR_COR_MASK 0x14 /* Correctable Error Mask */ + /* Same bits as above */ +-#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +-#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ ++#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/ ++#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */ + #define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ + #define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ + #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ + #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +-#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +-#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ ++#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ ++#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ + #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ + #define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ + #define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ +-#define PCI_ERR_ROOT_STATUS 48 ++#define PCI_ERR_ROOT_STATUS 0x30 + #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ + #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ + #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +@@ -795,52 +795,52 @@ + #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ + #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ + #define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ +-#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ ++#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */ + + /* Virtual Channel */ +-#define PCI_VC_PORT_CAP1 4 ++#define PCI_VC_PORT_CAP1 0x04 + #define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ + #define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ + #define PCI_VC_CAP1_ARB_SIZE 0x00000c00 +-#define PCI_VC_PORT_CAP2 8 ++#define PCI_VC_PORT_CAP2 0x08 + #define PCI_VC_CAP2_32_PHASE 0x00000002 + #define PCI_VC_CAP2_64_PHASE 0x00000004 + #define PCI_VC_CAP2_128_PHASE 0x00000008 + #define PCI_VC_CAP2_ARB_OFF 0xff000000 +-#define PCI_VC_PORT_CTRL 12 ++#define PCI_VC_PORT_CTRL 0x0c + #define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 +-#define PCI_VC_PORT_STATUS 14 ++#define PCI_VC_PORT_STATUS 0x0e + #define PCI_VC_PORT_STATUS_TABLE 0x00000001 +-#define PCI_VC_RES_CAP 16 ++#define PCI_VC_RES_CAP 0x10 + #define PCI_VC_RES_CAP_32_PHASE 0x00000002 + #define PCI_VC_RES_CAP_64_PHASE 0x00000004 + #define PCI_VC_RES_CAP_128_PHASE 0x00000008 + #define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 + #define PCI_VC_RES_CAP_256_PHASE 0x00000020 + #define PCI_VC_RES_CAP_ARB_OFF 0xff000000 +-#define PCI_VC_RES_CTRL 20 ++#define PCI_VC_RES_CTRL 0x14 + #define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 + #define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 + #define PCI_VC_RES_CTRL_ID 0x07000000 + #define PCI_VC_RES_CTRL_ENABLE 0x80000000 +-#define PCI_VC_RES_STATUS 26 ++#define PCI_VC_RES_STATUS 0x1a + #define PCI_VC_RES_STATUS_TABLE 0x00000001 + #define PCI_VC_RES_STATUS_NEGO 0x00000002 + #define PCI_CAP_VC_BASE_SIZEOF 0x10 +-#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C ++#define PCI_CAP_VC_PER_VC_SIZEOF 0x0c + + /* Power Budgeting */ +-#define PCI_PWR_DSR 4 /* Data Select Register */ +-#define PCI_PWR_DATA 8 /* Data Register */ ++#define PCI_PWR_DSR 0x04 /* Data Select Register */ ++#define PCI_PWR_DATA 0x08 /* Data Register */ + #define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ + #define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ + #define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ + #define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ + #define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ + #define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +-#define PCI_PWR_CAP 12 /* Capability */ ++#define PCI_PWR_CAP 0x0c /* Capability */ + #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +-#define PCI_EXT_CAP_PWR_SIZEOF 16 ++#define PCI_EXT_CAP_PWR_SIZEOF 0x10 + + /* Root Complex Event Collector Endpoint Association */ + #define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ +@@ -964,7 +964,7 @@ + #define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ + #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ + #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +-#define PCI_EXT_CAP_SRIOV_SIZEOF 64 ++#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40 + + #define PCI_LTR_MAX_SNOOP_LAT 0x4 + #define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +@@ -1017,12 +1017,12 @@ + #define PCI_TPH_LOC_NONE 0x000 /* no location */ + #define PCI_TPH_LOC_CAP 0x200 /* in capability */ + #define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ +-#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ +-#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ +-#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ ++#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ ++#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ ++#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ + + /* Downstream Port Containment */ +-#define PCI_EXP_DPC_CAP 4 /* DPC Capability */ ++#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ + #define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */ + #define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ + #define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ +@@ -1030,19 +1030,19 @@ + #define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ + #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ + +-#define PCI_EXP_DPC_CTL 6 /* DPC control */ ++#define PCI_EXP_DPC_CTL 0x06 /* DPC control */ + #define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ + #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ + #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ + +-#define PCI_EXP_DPC_STATUS 8 /* DPC Status */ ++#define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ + #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ + #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ + +-#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ ++#define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ + + #define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ + #define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */ +@@ -1086,7 +1086,11 @@ + + /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ + #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ ++#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) ++#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) ++#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) + #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ ++#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) + + /* Data Link Feature */ + #define PCI_DLF_CAP 0x04 /* Capabilities Register */ +diff --git a/include/standard-headers/linux/virtio_gpio.h b/include/standard-headers/linux/virtio_gpio.h +new file mode 100644 +index 0000000000..2b5cf06349 +--- /dev/null ++++ b/include/standard-headers/linux/virtio_gpio.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++ ++#ifndef _LINUX_VIRTIO_GPIO_H ++#define _LINUX_VIRTIO_GPIO_H ++ ++#include "standard-headers/linux/types.h" ++ ++/* Virtio GPIO Feature bits */ ++#define VIRTIO_GPIO_F_IRQ 0 ++ ++/* Virtio GPIO request types */ ++#define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 ++#define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 ++#define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 ++#define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 ++#define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 ++#define VIRTIO_GPIO_MSG_IRQ_TYPE 0x0006 ++ ++/* Possible values of the status field */ ++#define VIRTIO_GPIO_STATUS_OK 0x0 ++#define VIRTIO_GPIO_STATUS_ERR 0x1 ++ ++/* Direction types */ ++#define VIRTIO_GPIO_DIRECTION_NONE 0x00 ++#define VIRTIO_GPIO_DIRECTION_OUT 0x01 ++#define VIRTIO_GPIO_DIRECTION_IN 0x02 ++ ++/* Virtio GPIO IRQ types */ ++#define VIRTIO_GPIO_IRQ_TYPE_NONE 0x00 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING 0x01 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING 0x02 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH 0x03 ++#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH 0x04 ++#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW 0x08 ++ ++struct virtio_gpio_config { ++ uint16_t ngpio; ++ uint8_t padding[2]; ++ uint32_t gpio_names_size; ++}; ++ ++/* Virtio GPIO Request / Response */ ++struct virtio_gpio_request { ++ uint16_t type; ++ uint16_t gpio; ++ uint32_t value; ++}; ++ ++struct virtio_gpio_response { ++ uint8_t status; ++ uint8_t value; ++}; ++ ++struct virtio_gpio_response_get_names { ++ uint8_t status; ++ uint8_t value[]; ++}; ++ ++/* Virtio GPIO IRQ Request / Response */ ++struct virtio_gpio_irq_request { ++ uint16_t gpio; ++}; ++ ++struct virtio_gpio_irq_response { ++ uint8_t status; ++}; ++ ++/* Possible values of the interrupt status field */ ++#define VIRTIO_GPIO_IRQ_STATUS_INVALID 0x0 ++#define VIRTIO_GPIO_IRQ_STATUS_VALID 0x1 ++ ++#endif /* _LINUX_VIRTIO_GPIO_H */ +diff --git a/include/standard-headers/linux/virtio_i2c.h b/include/standard-headers/linux/virtio_i2c.h +new file mode 100644 +index 0000000000..09fa907793 +--- /dev/null ++++ b/include/standard-headers/linux/virtio_i2c.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ ++/* ++ * Definitions for virtio I2C Adpter ++ * ++ * Copyright (c) 2021 Intel Corporation. All rights reserved. ++ */ ++ ++#ifndef _LINUX_VIRTIO_I2C_H ++#define _LINUX_VIRTIO_I2C_H ++ ++#include "standard-headers/linux/const.h" ++#include "standard-headers/linux/types.h" ++ ++/* Virtio I2C Feature bits */ ++#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 ++ ++/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ ++#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) ++ ++/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ ++#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) ++ ++/** ++ * struct virtio_i2c_out_hdr - the virtio I2C message OUT header ++ * @addr: the controlled device address ++ * @padding: used to pad to full dword ++ * @flags: used for feature extensibility ++ */ ++struct virtio_i2c_out_hdr { ++ uint16_t addr; ++ uint16_t padding; ++ uint32_t flags; ++}; ++ ++/** ++ * struct virtio_i2c_in_hdr - the virtio I2C message IN header ++ * @status: the processing result from the backend ++ */ ++struct virtio_i2c_in_hdr { ++ uint8_t status; ++}; ++ ++/* The final status written by the device */ ++#define VIRTIO_I2C_MSG_OK 0 ++#define VIRTIO_I2C_MSG_ERR 1 ++ ++#endif /* _LINUX_VIRTIO_I2C_H */ +diff --git a/include/standard-headers/linux/virtio_iommu.h b/include/standard-headers/linux/virtio_iommu.h +index b9443b83a1..366379c2f0 100644 +--- a/include/standard-headers/linux/virtio_iommu.h ++++ b/include/standard-headers/linux/virtio_iommu.h +@@ -16,6 +16,7 @@ + #define VIRTIO_IOMMU_F_BYPASS 3 + #define VIRTIO_IOMMU_F_PROBE 4 + #define VIRTIO_IOMMU_F_MMIO 5 ++#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 + + struct virtio_iommu_range_64 { + uint64_t start; +@@ -36,6 +37,8 @@ struct virtio_iommu_config { + struct virtio_iommu_range_32 domain_range; + /* Probe buffer size */ + uint32_t probe_size; ++ uint8_t bypass; ++ uint8_t reserved[3]; + }; + + /* Request types */ +@@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { + uint8_t reserved[3]; + }; + ++#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) ++ + struct virtio_iommu_req_attach { + struct virtio_iommu_req_head head; + uint32_t domain; + uint32_t endpoint; +- uint8_t reserved[8]; ++ uint32_t flags; ++ uint8_t reserved[4]; + struct virtio_iommu_req_tail tail; + }; + +diff --git a/include/standard-headers/linux/virtio_pcidev.h b/include/standard-headers/linux/virtio_pcidev.h +new file mode 100644 +index 0000000000..bdf1d062da +--- /dev/null ++++ b/include/standard-headers/linux/virtio_pcidev.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Copyright (C) 2021 Intel Corporation ++ * Author: Johannes Berg ++ */ ++#ifndef _LINUX_VIRTIO_PCIDEV_H ++#define _LINUX_VIRTIO_PCIDEV_H ++#include "standard-headers/linux/types.h" ++ ++/** ++ * enum virtio_pcidev_ops - virtual PCI device operations ++ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors ++ * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; ++ * the @data field should be filled in by the device (in little endian). ++ * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; ++ * the @data field contains the data to write (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable; ++ * the @data field should be filled in by the device (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable; ++ * the @data field contains the data to write (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but ++ * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) ++ * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for ++ * the number ++ * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports ++ * the 16- or 32-bit write that would otherwise be done into memory, ++ * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above ++ * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be ++ * all zeroes) to signal the PME# pin. ++ */ ++enum virtio_pcidev_ops { ++ VIRTIO_PCIDEV_OP_RESERVED = 0, ++ VIRTIO_PCIDEV_OP_CFG_READ, ++ VIRTIO_PCIDEV_OP_CFG_WRITE, ++ VIRTIO_PCIDEV_OP_MMIO_READ, ++ VIRTIO_PCIDEV_OP_MMIO_WRITE, ++ VIRTIO_PCIDEV_OP_MMIO_MEMSET, ++ VIRTIO_PCIDEV_OP_INT, ++ VIRTIO_PCIDEV_OP_MSI, ++ VIRTIO_PCIDEV_OP_PME, ++}; ++ ++/** ++ * struct virtio_pcidev_msg - virtio PCI device operation ++ * @op: the operation to do ++ * @bar: the bar (only with BAR read/write messages) ++ * @reserved: reserved ++ * @size: the size of the read/write (in bytes) ++ * @addr: the address to read/write ++ * @data: the data, normally @size long, but just one byte for ++ * %VIRTIO_PCIDEV_OP_MMIO_MEMSET ++ * ++ * Note: the fields are all in native (CPU) endian, however, the ++ * @data values will often be in little endian (see the ops above.) ++ */ ++struct virtio_pcidev_msg { ++ uint8_t op; ++ uint8_t bar; ++ uint16_t reserved; ++ uint32_t size; ++ uint64_t addr; ++ uint8_t data[]; ++}; ++ ++#endif /* _LINUX_VIRTIO_PCIDEV_H */ +diff --git a/include/standard-headers/linux/virtio_scmi.h b/include/standard-headers/linux/virtio_scmi.h +new file mode 100644 +index 0000000000..8f2c305aea +--- /dev/null ++++ b/include/standard-headers/linux/virtio_scmi.h +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Copyright (C) 2020-2021 OpenSynergy GmbH ++ * Copyright (C) 2021 ARM Ltd. ++ */ ++ ++#ifndef _LINUX_VIRTIO_SCMI_H ++#define _LINUX_VIRTIO_SCMI_H ++ ++#include "standard-headers/linux/virtio_types.h" ++ ++/* Device implements some SCMI notifications, or delayed responses. */ ++#define VIRTIO_SCMI_F_P2A_CHANNELS 0 ++ ++/* Device implements any SCMI statistics shared memory region */ ++#define VIRTIO_SCMI_F_SHARED_MEMORY 1 ++ ++/* Virtqueues */ ++ ++#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */ ++#define VIRTIO_SCMI_VQ_RX 1 /* eventq */ ++#define VIRTIO_SCMI_VQ_MAX_CNT 2 ++ ++#endif /* _LINUX_VIRTIO_SCMI_H */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index 4557a8b608..1c48b0ae3b 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) + #define __NR_futex_waitv 449 + __SYSCALL(__NR_futex_waitv, sys_futex_waitv) + ++#define __NR_set_mempolicy_home_node 450 ++__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) ++ + #undef __NR_syscalls +-#define __NR_syscalls 450 ++#define __NR_syscalls 451 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 4b3e7ad1ec..1f14a6fad3 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -377,5 +377,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 488d9298d9..e5a8ebec78 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -353,5 +353,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index f47399870a..871d57168f 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -423,5 +423,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 11d54696dc..585c7fefbc 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -430,6 +430,8 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index cf740bab13..350f7ec0ac 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -402,6 +402,8 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h +new file mode 100644 +index 0000000000..cc5c45a9ce +--- /dev/null ++++ b/linux-headers/asm-riscv/bitsperlong.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2012 ARM Ltd. ++ * Copyright (C) 2015 Regents of the University of California ++ */ ++ ++#ifndef _ASM_RISCV_BITSPERLONG_H ++#define _ASM_RISCV_BITSPERLONG_H ++ ++#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) ++ ++#include ++ ++#endif /* _ASM_RISCV_BITSPERLONG_H */ +diff --git a/linux-headers/asm-riscv/mman.h b/linux-headers/asm-riscv/mman.h +new file mode 100644 +index 0000000000..8eebf89f5a +--- /dev/null ++++ b/linux-headers/asm-riscv/mman.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-riscv/unistd.h b/linux-headers/asm-riscv/unistd.h +new file mode 100644 +index 0000000000..8062996c2d +--- /dev/null ++++ b/linux-headers/asm-riscv/unistd.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2018 David Abdurachmanov ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#ifdef __LP64__ ++#define __ARCH_WANT_NEW_STAT ++#define __ARCH_WANT_SET_GET_RLIMIT ++#endif /* __LP64__ */ ++ ++#define __ARCH_WANT_SYS_CLONE3 ++ ++#include ++ ++/* ++ * Allows the instruction cache to be flushed from userspace. Despite RISC-V ++ * having a direct 'fence.i' instruction available to userspace (which we ++ * can't trap!), that's not actually viable when running on Linux because the ++ * kernel might schedule a process on another hart. There is no way for ++ * userspace to handle this without invoking the kernel (as it doesn't know the ++ * thread->hart mappings), so we've defined a RISC-V specific system call to ++ * flush the instruction cache. ++ * ++ * __NR_riscv_flush_icache is defined to flush the instruction cache over an ++ * address range, with the flush applying to either all threads or just the ++ * caller. We don't currently do anything with the address range, that's just ++ * in there for forwards compatibility. ++ */ ++#ifndef __NR_riscv_flush_icache ++#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) ++#endif ++__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 8f97d98128..8e644d65f5 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -420,5 +420,7 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 021ffc30e6..51da542fec 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -368,5 +368,7 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 5a776a08f7..2da3316bb5 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -373,9 +373,23 @@ struct kvm_debugregs { + __u64 reserved[9]; + }; + +-/* for KVM_CAP_XSAVE */ ++/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ + struct kvm_xsave { ++ /* ++ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes ++ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) ++ * respectively, when invoked on the vm file descriptor. ++ * ++ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) ++ * will always be at least 4096. Currently, it is only greater ++ * than 4096 if a dynamic feature has been enabled with ++ * ``arch_prctl()``, but this may change in the future. ++ * ++ * The offsets of the state save areas in struct kvm_xsave follow ++ * the contents of CPUID leaf 0xD on the host. ++ */ + __u32 region[1024]; ++ __u32 extra[0]; + }; + + #define KVM_MAX_XCRS 16 +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 9c9ffe312b..87e1e977af 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -440,6 +440,7 @@ + #define __NR_memfd_secret 447 + #define __NR_process_mrelease 448 + #define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index 084f1eef9c..147a78d623 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -362,6 +362,7 @@ + #define __NR_memfd_secret 447 + #define __NR_process_mrelease 448 + #define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index a2441affc2..27098db7fb 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -315,6 +315,7 @@ + #define __NR_memfd_secret (__X32_SYSCALL_BIT + 447) + #define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) + #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) ++#define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 02c5e7b7bb..00af3bc333 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1130,6 +1130,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_BINARY_STATS_FD 203 + #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 + #define KVM_CAP_ARM_MTE 205 ++#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 ++#define KVM_CAP_VM_GPA_BITS 207 ++#define KVM_CAP_XSAVE2 208 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1161,11 +1164,20 @@ struct kvm_irq_routing_hv_sint { + __u32 sint; + }; + ++struct kvm_irq_routing_xen_evtchn { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++}; ++ ++#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) ++ + /* gsi routing entry types */ + #define KVM_IRQ_ROUTING_IRQCHIP 1 + #define KVM_IRQ_ROUTING_MSI 2 + #define KVM_IRQ_ROUTING_S390_ADAPTER 3 + #define KVM_IRQ_ROUTING_HV_SINT 4 ++#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 + + struct kvm_irq_routing_entry { + __u32 gsi; +@@ -1177,6 +1189,7 @@ struct kvm_irq_routing_entry { + struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; ++ struct kvm_irq_routing_xen_evtchn xen_evtchn; + __u32 pad[8]; + } u; + }; +@@ -1207,6 +1220,7 @@ struct kvm_x86_mce { + #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) + + struct kvm_xen_hvm_config { + __u32 flags; +@@ -1609,6 +1623,9 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + ++/* Available with KVM_CAP_XSAVE2 */ ++#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) ++ + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch b/SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch new file mode 100644 index 0000000..1319926 --- /dev/null +++ b/SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch @@ -0,0 +1,58 @@ +From aa6181d87e2b4ef1a70be002881908d2df5548a9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 22 Feb 2022 17:58:11 +0100 +Subject: [PATCH 04/24] linux-headers: include missing changes from 5.17 + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [4/13] 2ed7cbc07e63d85cda916ef44d1e82b1fba7fdf4 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1ea5208febcc068449b63282d72bb719ab67a466) +Signed-off-by: Paul Lai +--- + linux-headers/asm-x86/kvm.h | 3 +++ + linux-headers/linux/kvm.h | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 2da3316bb5..bf6e96011d 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -452,6 +452,9 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + ++/* attributes for system fd (group 0) */ ++#define KVM_X86_XCOMP_GUEST_SUPP 0 ++ + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 00af3bc333..d232feaae9 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 ++#define KVM_CAP_SYS_ATTRIBUTES 209 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -2047,4 +2048,7 @@ struct kvm_stats_desc { + + #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + ++/* Available with KVM_CAP_XSAVE2 */ ++#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) ++ + #endif /* __LINUX_KVM_H */ +-- +2.35.3 + diff --git a/SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch b/SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch new file mode 100644 index 0000000..1ad047b --- /dev/null +++ b/SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch @@ -0,0 +1,725 @@ +From 64808db4a14867ad774b5e7535972a886e20a156 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 11 Nov 2021 12:06:01 +0100 +Subject: [PATCH 02/24] linux-headers: update to 5.16-rc1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [2/13] 4af2f4942db029b81890e3862793fb54b62791cc +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Signed-off-by: Paolo Bonzini +Acked-by: Cornelia Huck +Reviewed-by: Alex Bennée +Message-Id: <20211111110604.207376-3-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 43709a0ca3b09e952bde3f38112f1d7fbf7c65b1) +Signed-off-by: Paul Lai +--- + include/standard-headers/drm/drm_fourcc.h | 121 +++++++++++++++++- + include/standard-headers/linux/ethtool.h | 31 +++++ + include/standard-headers/linux/fuse.h | 10 +- + include/standard-headers/linux/pci_regs.h | 6 + + include/standard-headers/linux/virtio_gpu.h | 18 ++- + include/standard-headers/linux/virtio_ids.h | 24 ++++ + include/standard-headers/linux/virtio_vsock.h | 3 +- + linux-headers/asm-arm64/unistd.h | 1 + + linux-headers/asm-generic/unistd.h | 22 +++- + linux-headers/asm-mips/unistd_n32.h | 1 + + linux-headers/asm-mips/unistd_n64.h | 1 + + linux-headers/asm-mips/unistd_o32.h | 1 + + linux-headers/asm-powerpc/unistd_32.h | 1 + + linux-headers/asm-powerpc/unistd_64.h | 1 + + linux-headers/asm-s390/unistd_32.h | 1 + + linux-headers/asm-s390/unistd_64.h | 1 + + linux-headers/asm-x86/kvm.h | 5 + + linux-headers/asm-x86/unistd_32.h | 3 + + linux-headers/asm-x86/unistd_64.h | 3 + + linux-headers/asm-x86/unistd_x32.h | 3 + + linux-headers/linux/kvm.h | 40 +++++- + 21 files changed, 276 insertions(+), 21 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 352b51fd0a..2c025cb4fe 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -103,6 +103,12 @@ extern "C" { + /* 8 bpp Red */ + #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + ++/* 10 bpp Red */ ++#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ ++ ++/* 12 bpp Red */ ++#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ ++ + /* 16 bpp Red */ + #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + +@@ -372,6 +378,12 @@ extern "C" { + + #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + ++#define fourcc_mod_get_vendor(modifier) \ ++ (((modifier) >> 56) & 0xff) ++ ++#define fourcc_mod_is_vendor(modifier, vendor) \ ++ (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor) ++ + #define fourcc_mod_code(vendor, val) \ + ((((uint64_t)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) + +@@ -899,9 +911,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + + /* + * The top 4 bits (out of the 56 bits alloted for specifying vendor specific +- * modifiers) denote the category for modifiers. Currently we have only two +- * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen +- * different categories. ++ * modifiers) denote the category for modifiers. Currently we have three ++ * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of ++ * sixteen different categories. + */ + #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ + fourcc_mod_code(ARM, ((uint64_t)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) +@@ -1016,6 +1028,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + */ + #define AFBC_FORMAT_MOD_USM (1ULL << 12) + ++/* ++ * Arm Fixed-Rate Compression (AFRC) modifiers ++ * ++ * AFRC is a proprietary fixed rate image compression protocol and format, ++ * designed to provide guaranteed bandwidth and memory footprint ++ * reductions in graphics and media use-cases. ++ * ++ * AFRC buffers consist of one or more planes, with the same components ++ * and meaning as an uncompressed buffer using the same pixel format. ++ * ++ * Within each plane, the pixel/luma/chroma values are grouped into ++ * "coding unit" blocks which are individually compressed to a ++ * fixed size (in bytes). All coding units within a given plane of a buffer ++ * store the same number of values, and have the same compressed size. ++ * ++ * The coding unit size is configurable, allowing different rates of compression. ++ * ++ * The start of each AFRC buffer plane must be aligned to an alignment granule which ++ * depends on the coding unit size. ++ * ++ * Coding Unit Size Plane Alignment ++ * ---------------- --------------- ++ * 16 bytes 1024 bytes ++ * 24 bytes 512 bytes ++ * 32 bytes 2048 bytes ++ * ++ * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned ++ * to a multiple of the paging tile dimensions. ++ * The dimensions of each paging tile depend on whether the buffer is optimised for ++ * scanline (SCAN layout) or rotated (ROT layout) access. ++ * ++ * Layout Paging Tile Width Paging Tile Height ++ * ------ ----------------- ------------------ ++ * SCAN 16 coding units 4 coding units ++ * ROT 8 coding units 8 coding units ++ * ++ * The dimensions of each coding unit depend on the number of components ++ * in the compressed plane and whether the buffer is optimised for ++ * scanline (SCAN layout) or rotated (ROT layout) access. ++ * ++ * Number of Components in Plane Layout Coding Unit Width Coding Unit Height ++ * ----------------------------- --------- ----------------- ------------------ ++ * 1 SCAN 16 samples 4 samples ++ * Example: 16x4 luma samples in a 'Y' plane ++ * 16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 1 ROT 8 samples 8 samples ++ * Example: 8x8 luma samples in a 'Y' plane ++ * 8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 2 DONT CARE 8 samples 4 samples ++ * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 3 DONT CARE 4 samples 4 samples ++ * Example: 4x4 pixels in an RGB buffer without alpha ++ * ----------------------------- --------- ----------------- ------------------ ++ * 4 DONT CARE 4 samples 4 samples ++ * Example: 4x4 pixels in an RGB buffer with alpha ++ */ ++ ++#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 ++ ++#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ ++ DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) ++ ++/* ++ * AFRC coding unit size modifier. ++ * ++ * Indicates the number of bytes used to store each compressed coding unit for ++ * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance ++ * is the same for both Cb and Cr, which may be stored in separate planes. ++ * ++ * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store ++ * each compressed coding unit in the first plane of the buffer. For RGBA buffers ++ * this is the only plane, while for semi-planar and fully-planar YUV buffers, ++ * this corresponds to the luma plane. ++ * ++ * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store ++ * each compressed coding unit in the second and third planes in the buffer. ++ * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s). ++ * ++ * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified ++ * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. ++ * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and ++ * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. ++ */ ++#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf ++#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) ++#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) ++#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) ++ ++#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) ++#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) ++ ++/* ++ * AFRC scanline memory layout. ++ * ++ * Indicates if the buffer uses the scanline-optimised layout ++ * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. ++ * The memory layout is the same for all planes. ++ */ ++#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) ++ + /* + * Arm 16x16 Block U-Interleaved modifier + * +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 053d3fafdf..688eb8dc39 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -603,6 +603,7 @@ enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, ++ ETHTOOL_LINK_EXT_STATE_MODULE, + }; + + /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ +@@ -639,6 +640,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch { + enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, ++ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST, ++ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS, + }; + + /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ +@@ -647,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, + }; + ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */ ++enum ethtool_link_ext_substate_module { ++ ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1, ++}; ++ + #define ETH_GSTRING_LEN 32 + + /** +@@ -704,6 +712,29 @@ enum ethtool_stringset { + ETH_SS_COUNT + }; + ++/** ++ * enum ethtool_module_power_mode_policy - plug-in module power mode policy ++ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode. ++ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host ++ * to high power mode when the first port using it is put administratively ++ * up and to low power mode when the last port using it is put ++ * administratively down. ++ */ ++enum ethtool_module_power_mode_policy { ++ ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1, ++ ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO, ++}; ++ ++/** ++ * enum ethtool_module_power_mode - plug-in module power mode ++ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode. ++ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode. ++ */ ++enum ethtool_module_power_mode { ++ ETHTOOL_MODULE_POWER_MODE_LOW = 1, ++ ETHTOOL_MODULE_POWER_MODE_HIGH, ++}; ++ + /** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index cce105bfba..23ea31708b 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -181,6 +181,9 @@ + * - add FUSE_OPEN_KILL_SUIDGID + * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT + * - add FUSE_SETXATTR_ACL_KILL_SGID ++ * ++ * 7.34 ++ * - add FUSE_SYNCFS + */ + + #ifndef _LINUX_FUSE_H +@@ -212,7 +215,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 33 ++#define FUSE_KERNEL_MINOR_VERSION 34 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -505,6 +508,7 @@ enum fuse_opcode { + FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, ++ FUSE_SYNCFS = 50, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +@@ -967,4 +971,8 @@ struct fuse_removemapping_one { + #define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) + ++struct fuse_syncfs_in { ++ uint64_t padding; ++}; ++ + #endif /* _LINUX_FUSE_H */ +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index e709ae8235..ff6ccbc6ef 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -504,6 +504,12 @@ + #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ + #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ + #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ ++#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ + #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ + #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ + #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 1357e4774e..2da48d3d4c 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -59,6 +59,11 @@ + * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB + */ + #define VIRTIO_GPU_F_RESOURCE_BLOB 3 ++/* ++ * VIRTIO_GPU_CMD_CREATE_CONTEXT with ++ * context_init and multiple timelines ++ */ ++#define VIRTIO_GPU_F_CONTEXT_INIT 4 + + enum virtio_gpu_ctrl_type { + VIRTIO_GPU_UNDEFINED = 0, +@@ -122,14 +127,20 @@ enum virtio_gpu_shm_id { + VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1 + }; + +-#define VIRTIO_GPU_FLAG_FENCE (1 << 0) ++#define VIRTIO_GPU_FLAG_FENCE (1 << 0) ++/* ++ * If the following flag is set, then ring_idx contains the index ++ * of the command ring that needs to used when creating the fence ++ */ ++#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1) + + struct virtio_gpu_ctrl_hdr { + uint32_t type; + uint32_t flags; + uint64_t fence_id; + uint32_t ctx_id; +- uint32_t padding; ++ uint8_t ring_idx; ++ uint8_t padding[3]; + }; + + /* data passed in the cursor vq */ +@@ -269,10 +280,11 @@ struct virtio_gpu_resource_create_3d { + }; + + /* VIRTIO_GPU_CMD_CTX_CREATE */ ++#define VIRTIO_GPU_CONTEXT_INIT_CAPSET_ID_MASK 0x000000ff + struct virtio_gpu_ctx_create { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t nlen; +- uint32_t padding; ++ uint32_t context_init; + char debug_name[64]; + }; + +diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h +index 4fe842c3a3..80d76b75bc 100644 +--- a/include/standard-headers/linux/virtio_ids.h ++++ b/include/standard-headers/linux/virtio_ids.h +@@ -54,7 +54,31 @@ + #define VIRTIO_ID_SOUND 25 /* virtio sound */ + #define VIRTIO_ID_FS 26 /* virtio filesystem */ + #define VIRTIO_ID_PMEM 27 /* virtio pmem */ ++#define VIRTIO_ID_RPMB 28 /* virtio rpmb */ + #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ ++#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */ ++#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */ ++#define VIRTIO_ID_SCMI 32 /* virtio SCMI */ ++#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/ ++#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ ++#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */ ++#define VIRTIO_ID_CAN 36 /* virtio can */ ++#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */ ++#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */ ++#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ + #define VIRTIO_ID_BT 40 /* virtio bluetooth */ ++#define VIRTIO_ID_GPIO 41 /* virtio gpio */ ++ ++/* ++ * Virtio Transitional IDs ++ */ ++ ++#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ ++#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ ++#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ ++#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ ++#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ ++#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ ++#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h +index 3a23488e42..467e751b17 100644 +--- a/include/standard-headers/linux/virtio_vsock.h ++++ b/include/standard-headers/linux/virtio_vsock.h +@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown { + + /* VIRTIO_VSOCK_OP_RW flags values */ + enum virtio_vsock_rw { +- VIRTIO_VSOCK_SEQ_EOR = 1, ++ VIRTIO_VSOCK_SEQ_EOM = 1, ++ VIRTIO_VSOCK_SEQ_EOR = 2, + }; + + #endif /* _LINUX_VIRTIO_VSOCK_H */ +diff --git a/linux-headers/asm-arm64/unistd.h b/linux-headers/asm-arm64/unistd.h +index f83a70e07d..ce2ee8f1e3 100644 +--- a/linux-headers/asm-arm64/unistd.h ++++ b/linux-headers/asm-arm64/unistd.h +@@ -20,5 +20,6 @@ + #define __ARCH_WANT_SET_GET_RLIMIT + #define __ARCH_WANT_TIME32_SYSCALLS + #define __ARCH_WANT_SYS_CLONE3 ++#define __ARCH_WANT_MEMFD_SECRET + + #include +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index f211961ce1..4557a8b608 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise) + #define __NR_remap_file_pages 234 + __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) + #define __NR_mbind 235 +-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) ++__SYSCALL(__NR_mbind, sys_mbind) + #define __NR_get_mempolicy 236 +-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) ++__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) + #define __NR_set_mempolicy 237 +-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) ++__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) + #define __NR_migrate_pages 238 +-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) ++__SYSCALL(__NR_migrate_pages, sys_migrate_pages) + #define __NR_move_pages 239 +-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) ++__SYSCALL(__NR_move_pages, sys_move_pages) + #endif + + #define __NR_rt_tgsigqueueinfo 240 +@@ -873,8 +873,18 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) + #define __NR_landlock_restrict_self 446 + __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) + ++#ifdef __ARCH_WANT_MEMFD_SECRET ++#define __NR_memfd_secret 447 ++__SYSCALL(__NR_memfd_secret, sys_memfd_secret) ++#endif ++#define __NR_process_mrelease 448 ++__SYSCALL(__NR_process_mrelease, sys_process_mrelease) ++ ++#define __NR_futex_waitv 449 ++__SYSCALL(__NR_futex_waitv, sys_futex_waitv) ++ + #undef __NR_syscalls +-#define __NR_syscalls 447 ++#define __NR_syscalls 450 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 09cd297698..4b3e7ad1ec 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -376,5 +376,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 780e0cead6..488d9298d9 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -352,5 +352,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index 06a2b3b55e..f47399870a 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -422,5 +422,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index cd5a8a41b2..11d54696dc 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -429,6 +429,7 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 8458effa8d..cf740bab13 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -401,6 +401,7 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 0c3cd299e4..8f97d98128 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -419,5 +419,6 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 8dfc08b5e6..021ffc30e6 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -367,5 +367,6 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index a6c327f8ad..5a776a08f7 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch { + #define KVM_GUESTDBG_USE_HW_BP 0x00020000 + #define KVM_GUESTDBG_INJECT_DB 0x00040000 + #define KVM_GUESTDBG_INJECT_BP 0x00080000 ++#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 + + /* for KVM_SET_GUEST_DEBUG */ + struct kvm_guest_debug_arch { +@@ -503,4 +504,8 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + ++/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ ++#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ ++#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ ++ + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 66e96c0c68..9c9ffe312b 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -437,6 +437,9 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_memfd_secret 447 ++#define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index b8ff6f14ee..084f1eef9c 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -359,6 +359,9 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_memfd_secret 447 ++#define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 06a1097c15..a2441affc2 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -312,6 +312,9 @@ + #define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) + #define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) + #define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) ++#define __NR_memfd_secret (__X32_SYSCALL_BIT + 447) ++#define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) ++#define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index bcaf66cc4d..02c5e7b7bb 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -269,6 +269,7 @@ struct kvm_xen_exit { + #define KVM_EXIT_AP_RESET_HOLD 32 + #define KVM_EXIT_X86_BUS_LOCK 33 + #define KVM_EXIT_XEN 34 ++#define KVM_EXIT_RISCV_SBI 35 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -397,13 +398,23 @@ struct kvm_run { + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). ++ * ++ * Space beyond the defined fields may be used to store arbitrary ++ * debug information relating to the emulation failure. It is ++ * accounted for in "ndata" but the format is unspecified and is ++ * not represented in "flags". Any such information is *not* ABI! + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; +- __u8 insn_size; +- __u8 insn_bytes[15]; ++ union { ++ struct { ++ __u8 insn_size; ++ __u8 insn_bytes[15]; ++ }; ++ }; ++ /* Arbitrary debug data may follow. */ + } emulation_failure; + /* KVM_EXIT_OSI */ + struct { +@@ -469,6 +480,13 @@ struct kvm_run { + } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; ++ /* KVM_EXIT_RISCV_SBI */ ++ struct { ++ unsigned long extension_id; ++ unsigned long function_id; ++ unsigned long args[6]; ++ unsigned long ret[2]; ++ } riscv_sbi; + /* Fix the size of the union. */ + char padding[256]; + }; +@@ -1223,11 +1241,16 @@ struct kvm_irqfd { + + /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ + #define KVM_CLOCK_TSC_STABLE 2 ++#define KVM_CLOCK_REALTIME (1 << 2) ++#define KVM_CLOCK_HOST_TSC (1 << 3) + + struct kvm_clock_data { + __u64 clock; + __u32 flags; +- __u32 pad[9]; ++ __u32 pad0; ++ __u64 realtime; ++ __u64 host_tsc; ++ __u32 pad[4]; + }; + + /* For KVM_CAP_SW_TLB */ +@@ -1965,7 +1988,9 @@ struct kvm_stats_header { + #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +-#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK ++#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) ++#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) ++#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST + + #define KVM_STATS_UNIT_SHIFT 4 + #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +@@ -1988,8 +2013,9 @@ struct kvm_stats_header { + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in +- * struture kvm or kvm_vcpu. +- * @unused: Unused field for future usage. Always 0 for now. ++ * structure kvm or kvm_vcpu. ++ * @bucket_size: A parameter value used for histogram stats. It is only used ++ * for linear histogram stats, specifying the size of the bucket; + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +@@ -1998,7 +2024,7 @@ struct kvm_stats_desc { + __s16 exponent; + __u16 size; + __u32 offset; +- __u32 unused; ++ __u32 bucket_size; + char name[]; + }; + +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch new file mode 100644 index 0000000..8fdfe68 --- /dev/null +++ b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch @@ -0,0 +1,106 @@ +From a7c6bc008fe006f005d5c15d3f883572ad5defc5 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 20/37] migration: Add migrate_use_tls() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [20/26] 02afc2e60f1abbf6db45d83e54a18b66dad52426 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +A lot of places check parameters.tls_creds in order to evaluate if TLS is +in use, and sometimes call migrate_get_current() just for that test. + +Add new helper function migrate_use_tls() in order to simplify testing +for TLS usage. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-6-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) +Signed-off-by: Leonardo Bras +--- + migration/channel.c | 3 +-- + migration/migration.c | 9 +++++++++ + migration/migration.h | 1 + + migration/multifd.c | 5 +---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c4fc000a1a..086b5c0d8b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); +diff --git a/migration/migration.c b/migration/migration.c +index b0fc3f68bd..8e28f2ee41 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2568,6 +2568,15 @@ bool migrate_use_zero_copy_send(void) + } + #endif + ++int migrate_use_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 908098939f..9396b7e90a 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -344,6 +344,7 @@ bool migrate_use_zero_copy_send(void); + #else + #define migrate_use_zero_copy_send() (false) + #endif ++int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 3725226400..e53811f04a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -789,14 +789,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) + { +- MigrationState *s = migrate_get_current(); +- + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + + if (!error) { +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch b/SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch new file mode 100644 index 0000000..985bbe2 --- /dev/null +++ b/SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch @@ -0,0 +1,102 @@ +From 02eab793d82cd3c82d31f1e1f34d16fcc30caf0e Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 1 Mar 2022 16:39:14 +0800 +Subject: [PATCH 27/37] migration: Add migration_incoming_transport_cleanup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times +RH-Commit: [1/2] 57b2a9a165ee7cb2d01519bd54eb8dc4185815e0 +RH-Bugzilla: 2097652 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Hanna Reitz + +Add a helper to cleanup the transport listener. + +When do it, we should also null-ify the cleanup hook and the data, then it's +even safe to call it multiple times. + +Move the socket_address_list cleanup altogether, because that's a mirror of the +listener channels and only for the purpose of query-migrate. Hence when +someone wants to cleanup the listener transport, it should also want to cleanup +the socket list too, always. + +No functional change intended. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220301083925.33483-15-peterx@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e031149c78489413038e934eec9f54ac699cf322) +Signed-off-by: Peter Xu +--- + migration/migration.c | 22 ++++++++++++++-------- + migration/migration.h | 1 + + 2 files changed, 15 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c8aa55d2fe..b787a36789 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -263,6 +263,19 @@ MigrationIncomingState *migration_incoming_get_current(void) + return current_incoming; + } + ++void migration_incoming_transport_cleanup(MigrationIncomingState *mis) ++{ ++ if (mis->socket_address_list) { ++ qapi_free_SocketAddressList(mis->socket_address_list); ++ mis->socket_address_list = NULL; ++ } ++ ++ if (mis->transport_cleanup) { ++ mis->transport_cleanup(mis->transport_data); ++ mis->transport_data = mis->transport_cleanup = NULL; ++ } ++} ++ + void migration_incoming_state_destroy(void) + { + struct MigrationIncomingState *mis = migration_incoming_get_current(); +@@ -283,10 +296,8 @@ void migration_incoming_state_destroy(void) + g_array_free(mis->postcopy_remote_fds, TRUE); + mis->postcopy_remote_fds = NULL; + } +- if (mis->transport_cleanup) { +- mis->transport_cleanup(mis->transport_data); +- } + ++ migration_incoming_transport_cleanup(mis); + qemu_event_reset(&mis->main_thread_load_event); + + if (mis->page_requested) { +@@ -294,11 +305,6 @@ void migration_incoming_state_destroy(void) + mis->page_requested = NULL; + } + +- if (mis->socket_address_list) { +- qapi_free_SocketAddressList(mis->socket_address_list); +- mis->socket_address_list = NULL; +- } +- + yank_unregister_instance(MIGRATION_YANK_INSTANCE); + } + +diff --git a/migration/migration.h b/migration/migration.h +index 9396b7e90a..243898e3be 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -130,6 +130,7 @@ struct MigrationIncomingState { + + MigrationIncomingState *migration_incoming_get_current(void); + void migration_incoming_state_destroy(void); ++void migration_incoming_transport_cleanup(MigrationIncomingState *mis); + /* + * Functions to work with blocktime context + */ +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch new file mode 100644 index 0000000..63e67c6 --- /dev/null +++ b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -0,0 +1,250 @@ +From 2a84bf822cae38f67458043cd379a22e0fd22485 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 19/37] migration: Add zero-copy-send parameter for QMP/HMP for + Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [19/26] 44ec703088cad75fd6e504958527e81d3261c9df +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Add property that allows zero-copy migration of memory pages +on the sending side, and also includes a helper function +migrate_use_zero_copy_send() to check if it's enabled. + +No code is introduced to actually do the migration, but it allow +future implementations to enable/disable this feature. + +On non-Linux builds this parameter is compiled-out. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Message-Id: <20220513062836.965425-5-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++++++ + migration/migration.h | 5 +++++ + migration/socket.c | 11 +++++++++-- + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 24 ++++++++++++++++++++++++ + 5 files changed, 76 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8a13294da6..b0fc3f68bd 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -888,6 +888,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++ params->zero_copy_send = s->parameters.zero_copy_send; ++#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1541,6 +1545,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ dest->zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1653,6 +1662,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ s->parameters.zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2543,6 +2557,17 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.zero_copy_send; ++} ++#endif ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +@@ -4193,6 +4218,10 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, ++ parameters.zero_copy_send, false), ++#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4290,6 +4319,9 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/migration/migration.h b/migration/migration.h +index d016cedd9d..908098939f 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -339,6 +339,11 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void); ++#else ++#define migrate_use_zero_copy_send() (false) ++#endif + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/socket.c b/migration/socket.c +index 05705a32d8..3754d8f72c 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, + + if (qio_task_propagate_error(task, &err)) { + trace_migration_socket_outgoing_error(error_get_pretty(err)); +- } else { +- trace_migration_socket_outgoing_connected(data->hostname); ++ goto out; + } ++ ++ trace_migration_socket_outgoing_connected(data->hostname); ++ ++ if (migrate_use_zero_copy_send()) { ++ error_setg(&err, "Zero copy send not available in migration"); ++ } ++ ++out: + migration_channel_connect(data->s, sioc, data->hostname, err); + object_unref(OBJECT(sioc)); + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 2669156b28..e02da5008b 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1297,6 +1297,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++#ifdef CONFIG_LINUX ++ case MIGRATION_PARAMETER_ZERO_COPY_SEND: ++ p->has_zero_copy_send = true; ++ visit_type_bool(v, param, &p->zero_copy_send, &err); ++ break; ++#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index bbfd48cf0b..59b5c5780b 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -730,6 +730,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -769,6 +776,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', ++ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -895,6 +903,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -949,6 +964,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1095,6 +1111,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1147,6 +1170,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch b/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch new file mode 100644 index 0000000..245e2b4 --- /dev/null +++ b/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch @@ -0,0 +1,329 @@ +From b21f18afceba8231c78d29e66f58516e12c28d22 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 10/37] migration: All this fields are unsigned +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [10/26] 2c3ee27aae334db3b283ab7ef580f58e396e569d +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +So printing it as %d is wrong. Notice that for the channel id, that +is an uint8_t, but I changed it anyways for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Peter Xu +(cherry picked from commit 04e114049406dbb69fc9043c795ddd28fdba31a6) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 20 ++++++++++---------- + migration/multifd-zstd.c | 24 ++++++++++++------------ + migration/multifd.c | 16 ++++++++-------- + migration/trace-events | 26 +++++++++++++------------- + 4 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a1950a4588..a987e4a26c 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -52,7 +52,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { + g_free(z); +- error_setg(errp, "multifd %d: deflate init failed", p->id); ++ error_setg(errp, "multifd %u: deflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -62,7 +62,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + deflateEnd(&z->zs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + p->data = z; +@@ -134,12 +134,12 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + ret = deflate(zs, flush); + } while (ret == Z_OK && zs->avail_in && zs->avail_out); + if (ret == Z_OK && zs->avail_in) { +- error_setg(errp, "multifd %d: deflate failed to compress all input", ++ error_setg(errp, "multifd %u: deflate failed to compress all input", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } +@@ -193,7 +193,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + zs->avail_in = 0; + zs->next_in = Z_NULL; + if (inflateInit(zs) != Z_OK) { +- error_setg(errp, "multifd %d: inflate init failed", p->id); ++ error_setg(errp, "multifd %u: inflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -203,7 +203,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + inflateEnd(zs); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -252,7 +252,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZLIB) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } +@@ -289,19 +289,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret == Z_OK && zs->avail_in + && (zs->total_out - start) < page_size); + if (ret == Z_OK && (zs->total_out - start) < page_size) { +- error_setg(errp, "multifd %d: inflate generated too few output", ++ error_setg(errp, "multifd %u: inflate generated too few output", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } + } + out_size = zs->total_out - out_size; + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index d9ed42622b..2185a83eac 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -56,7 +56,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createCStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createCStream failed", p->id); + return -1; + } + +@@ -64,7 +64,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (ZSTD_isError(res)) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: initCStream failed with error %s", ++ error_setg(errp, "multifd %u: initCStream failed with error %s", + p->id, ZSTD_getErrorName(res)); + return -1; + } +@@ -75,7 +75,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -146,12 +146,12 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.size - z->out.pos > 0)); + if (ret > 0 && (z->in.size - z->in.pos > 0)) { +- error_setg(errp, "multifd %d: compressStream buffer too small", ++ error_setg(errp, "multifd %u: compressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: compressStream error %s", ++ error_setg(errp, "multifd %u: compressStream error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -201,7 +201,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createDStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createDStream failed", p->id); + return -1; + } + +@@ -209,7 +209,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (ZSTD_isError(ret)) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: initDStream failed with error %s", ++ error_setg(errp, "multifd %u: initDStream failed with error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -222,7 +222,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -270,7 +270,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZSTD) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } +@@ -302,19 +302,19 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.pos < page_size)); + if (ret > 0 && (z->out.pos < page_size)) { +- error_setg(errp, "multifd %d: decompressStream buffer too small", ++ error_setg(errp, "multifd %u: decompressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: decompressStream returned %s", ++ error_setg(errp, "multifd %u: decompressStream returned %s", + p->id, ZSTD_getErrorName(ret)); + return ret; + } + out_size += z->out.pos; + } + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 0533da154a..d0d19470f9 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -148,7 +148,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +@@ -212,8 +212,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.version != MULTIFD_VERSION) { +- error_setg(errp, "multifd: received packet version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received packet version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -303,7 +303,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + packet->version = be32_to_cpu(packet->version); + if (packet->version != MULTIFD_VERSION) { + error_setg(errp, "multifd: received packet " +- "version %d and expected version %d", ++ "version %u and expected version %u", + packet->version, MULTIFD_VERSION); + return -1; + } +@@ -317,7 +317,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + */ + if (packet->pages_alloc > pages_max * 100) { + error_setg(errp, "multifd: received packet " +- "with size %d and expected a maximum size of %d", ++ "with size %u and expected a maximum size of %u", + packet->pages_alloc, pages_max * 100) ; + return -1; + } +@@ -333,7 +333,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages->num = be32_to_cpu(packet->pages_used); + if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " +- "with %d pages and expected maximum pages are %d", ++ "with %u pages and expected maximum pages are %u", + p->pages->num, packet->pages_alloc) ; + return -1; + } +diff --git a/migration/trace-events b/migration/trace-events +index b48d873b8a..5172cb3b3d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -115,23 +115,23 @@ ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void * + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + + # multifd.c +-multifd_new_send_channel_async(uint8_t id) "channel %d" +-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_recv_new_channel(uint8_t id) "channel %d" ++multifd_new_send_channel_async(uint8_t id) "channel %u" ++multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" +-multifd_recv_sync_main_signal(uint8_t id) "channel %d" +-multifd_recv_sync_main_wait(uint8_t id) "channel %d" ++multifd_recv_sync_main_signal(uint8_t id) "channel %u" ++multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" +-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_recv_thread_start(uint8_t id) "%d" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_send_error(uint8_t id) "channel %d" ++multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_recv_thread_start(uint8_t id) "%u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" +-multifd_send_sync_main_signal(uint8_t id) "channel %d" +-multifd_send_sync_main_wait(uint8_t id) "channel %d" ++multifd_send_sync_main_signal(uint8_t id) "channel %u" ++multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_send_thread_start(uint8_t id) "%d" ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" + multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch new file mode 100644 index 0000000..b4f1e68 --- /dev/null +++ b/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch @@ -0,0 +1,98 @@ +From f5be3d8a5944679c1239b974e0f910f1afe4f532 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 31 Mar 2022 11:08:45 -0400 +Subject: [PATCH 28/37] migration: Allow migrate-recover to run multiple times +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times +RH-Commit: [2/2] a2e6b02007a06c9c7f5237289095811c7d7ca1f1 +RH-Bugzilla: 2097652 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Hanna Reitz + +Previously migration didn't have an easy way to cleanup the listening +transport, migrate recovery only allows to execute once. That's done with a +trick flag in postcopy_recover_triggered. + +Now the facility is already there. + +Drop postcopy_recover_triggered and instead allows a new migrate-recover to +release the previous listener transport. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220331150857.74406-8-peterx@redhat.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 ++----------- + migration/migration.h | 1 - + migration/savevm.c | 3 --- + 3 files changed, 2 insertions(+), 15 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index b787a36789..616c3ff32e 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2158,11 +2158,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) + return; + } + +- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, +- false, true) == true) { +- error_setg(errp, "Migrate recovery is triggered already"); +- return; +- } ++ /* If there's an existing transport, release it */ ++ migration_incoming_transport_cleanup(mis); + + /* + * Note that this call will never start a real migration; it will +@@ -2170,12 +2167,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * to continue using that newly established channel. + */ + qemu_start_incoming_migration(uri, errp); +- +- /* Safe to dereference with the assert above */ +- if (*errp) { +- /* Reset the flag so user could still retry */ +- qatomic_set(&mis->postcopy_recover_triggered, false); +- } + } + + void qmp_migrate_pause(Error **errp) +diff --git a/migration/migration.h b/migration/migration.h +index 243898e3be..0ae2133326 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -103,7 +103,6 @@ struct MigrationIncomingState { + struct PostcopyBlocktimeContext *blocktime_ctx; + + /* notify PAUSED postcopy incoming migrations to try to continue */ +- bool postcopy_recover_triggered; + QemuSemaphore postcopy_pause_sem_dst; + QemuSemaphore postcopy_pause_sem_fault; + +diff --git a/migration/savevm.c b/migration/savevm.c +index 0bef031acb..b8382aaa64 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2568,9 +2568,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) + + assert(migrate_postcopy_ram()); + +- /* Clear the triggered bit to allow one recovery */ +- mis->postcopy_recover_triggered = false; +- + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch new file mode 100644 index 0000000..f1a7d49 --- /dev/null +++ b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch @@ -0,0 +1,93 @@ +From 097f72427f4f5da4fdcdbeee52aea0c1f67d54dc Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 19 Jul 2022 09:23:45 -0300 +Subject: [PATCH 6/9] migration: Avoid false-positive on non-supported + scenarios for zero-copy-send +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [6/8] f23195f3ab4f6eba0463f38e5971ccaccdac2cfd +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Migration with zero-copy-send currently has it's limitations, as it can't +be used with TLS nor any kind of compression. In such scenarios, it should +output errors during parameter / capability setting. + +But currently there are some ways of setting this not-supported scenarios +without printing the error message: + +!) For 'compression' capability, it works by enabling it together with +zero-copy-send. This happens because the validity test for zero-copy uses +the helper unction migrate_use_compression(), which check for compression +presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. + +The point here is: the validity test happens before the capability gets +enabled. If all of them get enabled together, this test will not return +error. + +In order to fix that, replace migrate_use_compression() by directly testing +the cap_list parameter migrate_caps_check(). + +2) For features enabled by parameters such as TLS & 'multifd_compression', +there was also a possibility of setting non-supported scenarios: setting +zero-copy-send first, then setting the unsupported parameter. + +In order to fix that, also add a check for parameters conflicting with +zero-copy-send on migrate_params_check(). + +3) XBZRLE is also a compression capability, so it makes sense to also add +it to the list of capabilities which are not supported with zero-copy-send. + +Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") +Signed-off-by: Leonardo Bras +Message-Id: <20220719122345.253713-1-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 952a26c5c2..35b3197eff 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1260,7 +1260,9 @@ static bool migrate_caps_check(bool *cap_list, + #ifdef CONFIG_LINUX + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && + (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- migrate_use_compression() || ++ cap_list[MIGRATION_CAPABILITY_COMPRESS] || ++ cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); +@@ -1497,6 +1499,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_use_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ + return true; + } + +-- +2.31.1 + diff --git a/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch new file mode 100644 index 0000000..b1f576d --- /dev/null +++ b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -0,0 +1,289 @@ +From 70108ff9ffe77062116e47670c0e0c2396529f88 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:45 -0300 +Subject: [PATCH 26/37] migration: Change zero_copy_send from migration + parameter to migration capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [26/26] ea61e6cbdbe47611bd22d18988e1c4c4e8357cc3 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +When originally implemented, zero_copy_send was designed as a Migration +paramenter. + +But taking into account how is that supposed to work, and how +the difference between a capability and a parameter, it only makes sense +that zero-copy-send would work better as a capability. + +Taking into account how recently the change got merged, it was decided +that it's still time to make it right, and convert zero_copy_send into +a Migration capability. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: always define the capability, even on non-Linux but error if +set; avoids build problems with the capability +(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 58 +++++++++++++++++++------------------------ + monitor/hmp-cmds.c | 6 ----- + qapi/migration.json | 33 +++++++----------------- + 3 files changed, 34 insertions(+), 63 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 5357efd348..c8aa55d2fe 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -162,7 +162,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID); ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); + + bool migrate_pre_2_2; + +@@ -888,10 +889,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +- params->zero_copy_send = s->parameters.zero_copy_send; +-#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1249,6 +1246,24 @@ static bool migrate_caps_check(bool *cap_list, + } + } + ++#ifdef CONFIG_LINUX ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || ++ migrate_use_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ + /* incoming side only */ + if (runstate_check(RUN_STATE_INMIGRATE) && + !migrate_multifd_is_allowed() && +@@ -1471,16 +1486,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +-#ifdef CONFIG_LINUX +- if (params->zero_copy_send && +- (!migrate_use_multifd() || +- params->multifd_compression != MULTIFD_COMPRESSION_NONE || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif + return true; + } + +@@ -1554,11 +1559,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- dest->zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1671,11 +1671,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- s->parameters.zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2573,7 +2568,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->parameters.zero_copy_send; ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -4236,10 +4231,6 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, +- parameters.zero_copy_send, false), +-#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4277,6 +4268,10 @@ static Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -4337,9 +4332,6 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +-#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index e02da5008b..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1297,12 +1297,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; +-#ifdef CONFIG_LINUX +- case MIGRATION_PARAMETER_ZERO_COPY_SEND: +- p->has_zero_copy_send = true; +- visit_type_bool(v, param, &p->zero_copy_send, &err); +- break; +-#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 59b5c5780b..fe70a0c4b2 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -452,6 +452,13 @@ + # procedure starts. The VM RAM is saved with running VM. + # (since 6.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# (since 7.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -465,7 +472,8 @@ + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, +- 'validate-uuid', 'background-snapshot'] } ++ 'validate-uuid', 'background-snapshot', ++ 'zero-copy-send'] } + + ## + # @MigrationCapabilityStatus: +@@ -730,12 +738,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) + # + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such +@@ -776,7 +778,6 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', +- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -903,13 +904,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -964,7 +958,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1111,13 +1104,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1170,7 +1156,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch b/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch new file mode 100644 index 0000000..561e231 --- /dev/null +++ b/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch @@ -0,0 +1,122 @@ +From 030b54f5a2b2c8976370c962e9847af4746ac2c2 Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:40 +0000 +Subject: [PATCH 1/9] migration: Introduce ram_transferred_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [1/8] a6545760b0de13d533f6164be0545a6720bb42c7 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Replace direct manipulation of ram_counters.transferred with a +function. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 4c2d0f6dca24f3396ab0718ad3f9f53cc53004df) +Signed-off-by: Leonardo Bras +--- + migration/ram.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3e208efca7..3e82c4ff46 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -391,6 +391,11 @@ uint64_t ram_bytes_remaining(void) + + MigrationStats ram_counters; + ++static void ram_transferred_add(uint64_t bytes) ++{ ++ ram_counters.transferred += bytes; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +@@ -772,7 +777,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, + * RAM_SAVE_FLAG_CONTINUE. + */ + xbzrle_counters.bytes += bytes_xbzrle - 8; +- ram_counters.transferred += bytes_xbzrle; ++ ram_transferred_add(bytes_xbzrle); + + return 1; + } +@@ -1203,7 +1208,7 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) + + if (len) { + ram_counters.duplicate++; +- ram_counters.transferred += len; ++ ram_transferred_add(len); + return 1; + } + return -1; +@@ -1239,7 +1244,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } + + if (bytes_xmit) { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + *pages = 1; + } + +@@ -1270,8 +1275,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + uint8_t *buf, bool async) + { +- ram_counters.transferred += save_page_header(rs, rs->f, block, +- offset | RAM_SAVE_FLAG_PAGE); ++ ram_transferred_add(save_page_header(rs, rs->f, block, ++ offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, + migrate_release_ram() & +@@ -1279,7 +1284,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); + } +- ram_counters.transferred += TARGET_PAGE_SIZE; ++ ram_transferred_add(TARGET_PAGE_SIZE); + ram_counters.normal++; + return 1; + } +@@ -1378,7 +1383,7 @@ exit: + static void + update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + + if (param->zero_page) { + ram_counters.duplicate++; +@@ -2303,7 +2308,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + ram_counters.duplicate += pages; + } else { + ram_counters.normal += pages; +- ram_counters.transferred += size; ++ ram_transferred_add(size); + qemu_update_position(f, size); + } + } +@@ -3147,7 +3152,7 @@ out: + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +- ram_counters.transferred += 8; ++ ram_transferred_add(8); + + ret = qemu_file_get_error(f); + } +-- +2.31.1 + diff --git a/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch b/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch new file mode 100644 index 0000000..d956712 --- /dev/null +++ b/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch @@ -0,0 +1,116 @@ +From 6a9a5a2809cbbe2982df156722b88efeec998e3d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 01/37] migration: Never call twice qemu_target_page_size() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [1/26] 809ca84dec80bafc1959df8c9e57f482ee752a97 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 144fa06b3431e806057ce1438338395b35a3e544) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 7 ++++--- + migration/multifd.c | 7 ++++--- + migration/savevm.c | 5 +++-- + 3 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a87ff01b81..8a13294da6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -992,6 +992,8 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s) + + static void populate_ram_info(MigrationInfo *info, MigrationState *s) + { ++ size_t page_size = qemu_target_page_size(); ++ + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_counters.transferred; +@@ -1000,12 +1002,11 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = ram_counters.normal; +- info->ram->normal_bytes = ram_counters.normal * +- qemu_target_page_size(); ++ info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->postcopy_requests = ram_counters.postcopy_requests; +- info->ram->page_size = qemu_target_page_size(); ++ info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; + +diff --git a/migration/multifd.c b/migration/multifd.c +index 7c9deb1921..8125d0015c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -289,7 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + { + MultiFDPacket_t *packet = p->packet; +- uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t pages_max = MULTIFD_PACKET_SIZE / page_size; + RAMBlock *block; + int i; + +@@ -358,14 +359,14 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + for (i = 0; i < p->pages->used; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +- if (offset > (block->used_length - qemu_target_page_size())) { ++ if (offset > (block->used_length - page_size)) { + error_setg(errp, "multifd: offset too long %" PRIu64 + " (max " RAM_ADDR_FMT ")", + offset, block->used_length); + return -1; + } + p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = qemu_target_page_size(); ++ p->pages->iov[i].iov_len = page_size; + } + + return 0; +diff --git a/migration/savevm.c b/migration/savevm.c +index d59e976d50..0bef031acb 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1685,6 +1685,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + { + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; ++ size_t page_size = qemu_target_page_size(); + Error *local_err = NULL; + + trace_loadvm_postcopy_handle_advise(); +@@ -1741,13 +1742,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + } + + remote_tps = qemu_get_be64(mis->from_src_file); +- if (remote_tps != qemu_target_page_size()) { ++ if (remote_tps != page_size) { + /* + * Again, some differences could be dealt with, but for now keep it + * simple. + */ + error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", +- (int)remote_tps, qemu_target_page_size()); ++ (int)remote_tps, page_size); + return -1; + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-migration-Read-state-once.patch b/SOURCES/kvm-migration-Read-state-once.patch new file mode 100644 index 0000000..63fd047 --- /dev/null +++ b/SOURCES/kvm-migration-Read-state-once.patch @@ -0,0 +1,76 @@ +From 34eae2d7ef928a7e0e10cc30fe76839c005998eb Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Wed, 13 Apr 2022 12:33:29 +0100 +Subject: [PATCH 07/11] migration: Read state once + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 249: migration: Read state once +RH-Bugzilla: 2074205 +RH-Acked-by: Peter Xu +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Jon Maloy +RH-Acked-by: quintela1 +RH-Commit: [1/1] 9aa47b492a646fce4e66ebd9b7d7a85286d16051 + +The 'status' field for the migration is updated normally using +an atomic operation from the migration thread. +Most readers of it aren't that careful, and in most cases it doesn't +matter. + +In query_migrate->fill_source_migration_info the 'state' +is read twice; the first time to decide which state fields to fill in, +and then secondly to copy the state to the status field; that can end up +with a status that's inconsistent; e.g. setting up the fields +for 'setup' and then having an 'active' status. In that case +libvirt gets upset by the lack of ram info. +The symptom is: + libvirt.libvirtError: internal error: migration was active, but no RAM info was set + +Read the state exactly once in fill_source_migration_info. + +This is a possible fix for: +https://bugzilla.redhat.com/show_bug.cgi?id=2074205 + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20220413113329.103696-1-dgilbert@redhat.com> +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 552de79bfdd5e9e53847eb3c6d6e4cd898a4370e) +--- + migration/migration.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 51e6726dac..d8b24a2c91 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1071,6 +1071,7 @@ static void populate_disk_info(MigrationInfo *info) + static void fill_source_migration_info(MigrationInfo *info) + { + MigrationState *s = migrate_get_current(); ++ int state = qatomic_read(&s->state); + GSList *cur_blocker = migration_blockers; + + info->blocked_reasons = NULL; +@@ -1090,7 +1091,7 @@ static void fill_source_migration_info(MigrationInfo *info) + } + info->has_blocked_reasons = info->blocked_reasons != NULL; + +- switch (s->state) { ++ switch (state) { + case MIGRATION_STATUS_NONE: + /* no migration has happened ever */ + /* do not overwrite destination migration status */ +@@ -1135,7 +1136,7 @@ static void fill_source_migration_info(MigrationInfo *info) + info->has_status = true; + break; + } +- info->status = s->state; ++ info->status = state; + } + + typedef enum WriteTrackingSupport { +-- +2.37.3 + diff --git a/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch b/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch new file mode 100644 index 0000000..1cf4724 --- /dev/null +++ b/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch @@ -0,0 +1,122 @@ +From 82637509cc9197ad9d1e1b286a608bf0da04b7b3 Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:41 +0000 +Subject: [PATCH 2/9] migration: Tally pre-copy, downtime and post-copy bytes + independently +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [2/8] 7d1bf37a3d93da88da6525d70fc1fce1abb92b83 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Provide information on the number of bytes copied in the pre-copy, +downtime and post-copy phases of migration. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae6806688016711bb9ec7541266d76ab511c5e3b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 3 +++ + migration/ram.c | 7 +++++++ + monitor/hmp-cmds.c | 12 ++++++++++++ + qapi/migration.json | 13 ++++++++++++- + 4 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 616c3ff32e..e100b30f00 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1016,6 +1016,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; ++ info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->postcopy_bytes = ram_counters.postcopy_bytes; + + if (migrate_use_xbzrle()) { + info->has_xbzrle_cache = true; +diff --git a/migration/ram.c b/migration/ram.c +index 3e82c4ff46..e7173da217 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -393,6 +393,13 @@ MigrationStats ram_counters; + + static void ram_transferred_add(uint64_t bytes) + { ++ if (runstate_is_running()) { ++ ram_counters.precopy_bytes += bytes; ++ } else if (migration_in_postcopy()) { ++ ram_counters.postcopy_bytes += bytes; ++ } else { ++ ram_counters.downtime_bytes += bytes; ++ } + ram_counters.transferred += bytes; + } + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 2669156b28..8c384dc1b2 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", + info->ram->postcopy_requests); + } ++ if (info->ram->precopy_bytes) { ++ monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", ++ info->ram->precopy_bytes >> 10); ++ } ++ if (info->ram->downtime_bytes) { ++ monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", ++ info->ram->downtime_bytes >> 10); ++ } ++ if (info->ram->postcopy_bytes) { ++ monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", ++ info->ram->postcopy_bytes >> 10); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index fe70a0c4b2..c8ec260ab0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -46,6 +46,15 @@ + # @pages-per-second: the number of memory pages transferred per second + # (Since 4.0) + # ++# @precopy-bytes: The number of bytes sent in the pre-copy phase ++# (since 7.0). ++# ++# @downtime-bytes: The number of bytes sent while the guest is paused ++# (since 7.0). ++# ++# @postcopy-bytes: The number of bytes sent during the post-copy phase ++# (since 7.0). ++# + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -54,7 +63,9 @@ + 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', + 'mbps' : 'number', 'dirty-sync-count' : 'int', + 'postcopy-requests' : 'int', 'page-size' : 'int', +- 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } } ++ 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', ++ 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', ++ 'postcopy-bytes' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.31.1 + diff --git a/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch new file mode 100644 index 0000000..73011b3 --- /dev/null +++ b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch @@ -0,0 +1,62 @@ +From 8aecb49fdd771c5819fccc9e750b2e9cd4e94b58 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 25 Jul 2022 22:02:35 -0300 +Subject: [PATCH 7/9] migration: add remaining params->has_* = true in + migration_instance_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [7/8] fb622e5b88e14eb859d4903d9c088ba6ca63fc81 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Some of params->has_* = true are missing in migration_instance_init, this +causes migrate_params_check() to skip some tests, allowing some +unsupported scenarios. + +Fix this by adding all missing params->has_* = true in +migration_instance_init(). + +Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") +Fixes: 1d58872a91 ("migration: do not wait for free thread") +Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") +Signed-off-by: Leonardo Bras +Message-Id: <20220726010235.342927-1-leobras@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 35b3197eff..51e6726dac 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4334,6 +4334,7 @@ static void migration_instance_init(Object *obj) + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; ++ params->has_compress_wait_thread = true; + params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; +@@ -4354,6 +4355,9 @@ static void migration_instance_init(Object *obj) + params->has_announce_max = true; + params->has_announce_rounds = true; + params->has_announce_step = true; ++ params->has_tls_creds = true; ++ params->has_tls_hostname = true; ++ params->has_tls_authz = true; + + qemu_sem_init(&ms->postcopy_pause_sem, 0); + qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); +-- +2.31.1 + diff --git a/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch new file mode 100644 index 0000000..7838333 --- /dev/null +++ b/SOURCES/kvm-migration-check-magic-value-for-deciding-the-mapping.patch @@ -0,0 +1,296 @@ +From f21a343af4b4d0c6e5181ae0abd0f6280dc8296c Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:18 +0000 +Subject: [PATCH 2/3] migration: check magic value for deciding the mapping of + channels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 258: migration: Fix multifd crash due to channel disorder +RH-Bugzilla: 2137740 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [2/2] f97bebef3d3e372cfd660e5ddb6cffba791840d2 + +Conflicts: + migration/migration.c + migration/multifd.c + migration/postcopy-ram.c + migration/postcopy-ram.h + + There're a bunch of conflicts due to missing upstream patches on + e.g. on qemufile reworks, postcopy preempt. We don't plan to have + preempt in rhel8 at all, probably the same as the rest. + +Current logic assumes that channel connections on the destination side are +always established in the same order as the source and the first one will +always be the main channel followed by the multifid or post-copy +preemption channel. This may not be always true, as even if a channel has a +connection established on the source side it can be in the pending state on +the destination side and a newer connection can be established first. +Basically causing out of order mapping of channels on the destination side. +Currently, all channels except post-copy preempt send a magic number, this +patch uses that magic number to decide the type of channel. This logic is +applicable only for precopy(multifd) live migration, as mentioned, the +post-copy preempt channel does not send any magic number. Also, tls live +migrations already does tls handshake before creating other channels, so +this issue is not possible with tls, hence this logic is avoided for tls +live migrations. This patch uses read peek to check the magic number of +channels so that current data/control stream management remains +un-effected. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) +Signed-off-by: Peter Xu +--- + migration/channel.c | 45 ++++++++++++++++++++++++++++++++++++++ + migration/channel.h | 5 +++++ + migration/migration.c | 51 +++++++++++++++++++++++++++++++------------ + migration/multifd.c | 19 ++++++++-------- + migration/multifd.h | 2 +- + 5 files changed, 98 insertions(+), 24 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index 086b5c0d8b..ee308fef23 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -98,3 +98,48 @@ void migration_channel_connect(MigrationState *s, + g_free(s->hostname); + error_free(error); + } ++ ++ ++/** ++ * @migration_channel_read_peek - Peek at migration channel, without ++ * actually removing it from channel buffer. ++ * ++ * @ioc: the channel object ++ * @buf: the memory region to read data into ++ * @buflen: the number of bytes to read in @buf ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Returns 0 if successful, returns -1 and sets @errp if fails. ++ */ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp) ++{ ++ ssize_t len = 0; ++ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; ++ ++ while (true) { ++ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, ++ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); ++ ++ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ error_setg(errp, ++ "Failed to peek at channel"); ++ return -1; ++ } ++ ++ if (len == buflen) { ++ break; ++ } ++ ++ /* 1ms sleep. */ ++ if (qemu_in_coroutine()) { ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ } else { ++ g_usleep(1000); ++ } ++ } ++ ++ return 0; ++} +diff --git a/migration/channel.h b/migration/channel.h +index 67a461c28a..5bdb8208a7 100644 +--- a/migration/channel.h ++++ b/migration/channel.h +@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error *error_in); ++ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp); + #endif +diff --git a/migration/migration.c b/migration/migration.c +index d8b24a2c91..0885549de0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -32,6 +32,7 @@ + #include "savevm.h" + #include "qemu-file-channel.h" + #include "qemu-file.h" ++#include "channel.h" + #include "migration/vmstate.h" + #include "block/block.h" + #include "qapi/error.h" +@@ -637,10 +638,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup(errp) != 0) { +- return false; +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -701,10 +698,42 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) + void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); ++ bool default_channel = true; ++ uint32_t channel_magic = 0; + Error *local_err = NULL; +- bool start_migration; ++ int ret = 0; + +- if (!mis->from_src_file) { ++ if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ /* ++ * With multiple channels, it is possible that we receive channels ++ * out of order on destination side, causing incorrect mapping of ++ * source channels on destination side. Check channel MAGIC to ++ * decide type of channel. Please note this is best effort, postcopy ++ * preempt channel does not send any magic number so avoid it for ++ * postcopy live migration. Also tls live migration already does ++ * tls handshake while initializing main channel so with tls this ++ * issue is not possible. ++ */ ++ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, ++ sizeof(channel_magic), &local_err); ++ ++ if (ret != 0) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); ++ } else { ++ default_channel = !mis->from_src_file; ++ } ++ ++ if (multifd_load_setup(errp) != 0) { ++ error_setg(errp, "Failed to setup multifd channels"); ++ return; ++ } ++ ++ if (default_channel) { + /* The first connection (multifd may have multiple) */ + QEMUFile *f = qemu_fopen_channel_input(ioc); + +@@ -716,23 +745,17 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + if (!migration_incoming_setup(f, errp)) { + return; + } +- +- /* +- * Common migration only needs one channel, so we can start +- * right now. Multifd needs more than one channel, we wait. +- */ +- start_migration = !migrate_use_multifd(); + } else { + /* Multiple connections */ + assert(migrate_use_multifd()); +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + +- if (start_migration) { ++ if (migration_has_all_channels()) { + migration_incoming_process(); + } + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 7c16523e6b..75ac052d2f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1183,9 +1183,14 @@ int multifd_load_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ /* ++ * Return successfully if multiFD recv state is already initialised ++ * or multiFD is not enabled. ++ */ ++ if (multifd_recv_state || !migrate_use_multifd()) { + return 0; + } ++ + if (!migrate_multifd_is_allowed()) { + error_setg(errp, "multifd is not supported by current protocol"); + return -1; +@@ -1244,11 +1249,9 @@ bool multifd_recv_all_channels_created(void) + + /* + * Try to receive all multifd channels to get ready for the migration. +- * - Return true and do not set @errp when correctly receiving all channels; +- * - Return false and do not set @errp when correctly receiving the current one; +- * - Return false and set @errp when failing to receive the current channel. ++ * Sets @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +@@ -1261,7 +1264,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); +- return false; ++ return; + } + trace_multifd_recv_new_channel(id); + +@@ -1271,7 +1274,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1282,6 +1285,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +- return qatomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 11d5e273e6..9c0a2a0701 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -20,7 +20,7 @@ void multifd_save_cleanup(void); + int multifd_load_setup(Error **errp); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +-- +2.37.3 + diff --git a/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch new file mode 100644 index 0000000..5008e15 --- /dev/null +++ b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch @@ -0,0 +1,83 @@ +From 2516a21205e67078cb735e9fd47ba50156c166b7 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:13 -0300 +Subject: [PATCH 5/9] migration/multifd: Report to user when zerocopy not + working +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [5/8] 0b2e23b7f8ae72936e11369cd44ba474ef3b9e8c +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Some errors, like the lack of Scatter-Gather support by the network +interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using +zero-copy, which causes it to fall back to the default copying mechanism. + +After each full dirty-bitmap scan there should be a zero-copy flush +happening, which checks for errors each of the previous calls to +sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then +increment dirty_sync_missed_zero_copy migration stat to let the user know +about it. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Peter Xu +Message-Id: <20220711211112.18951-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + migration/ram.c | 5 +++++ + migration/ram.h | 2 ++ + 3 files changed, 9 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 90ab4c4346..7c16523e6b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -631,6 +631,8 @@ int multifd_send_sync_main(QEMUFile *f) + if (ret < 0) { + error_report_err(err); + return -1; ++ } else if (ret == 1) { ++ dirty_sync_missed_zero_copy(); + } + } + } +diff --git a/migration/ram.c b/migration/ram.c +index e7173da217..93cdb456ac 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -403,6 +403,11 @@ static void ram_transferred_add(uint64_t bytes) + ram_counters.transferred += bytes; + } + ++void dirty_sync_missed_zero_copy(void) ++{ ++ ram_counters.dirty_sync_missed_zero_copy++; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +diff --git a/migration/ram.h b/migration/ram.h +index c515396a9a..69c3ccb26a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -88,4 +88,6 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + ++void dirty_sync_missed_zero_copy(void); ++ + #endif +-- +2.31.1 + diff --git a/SOURCES/kvm-multifd-Add-missing-documentation.patch b/SOURCES/kvm-multifd-Add-missing-documentation.patch new file mode 100644 index 0000000..361f0c1 --- /dev/null +++ b/SOURCES/kvm-multifd-Add-missing-documentation.patch @@ -0,0 +1,82 @@ +From 3b567f762cbd8d4ffaf717b0baba9cf9fe9614c2 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 03/37] multifd: Add missing documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [3/26] 924fca4305ebd8669955d456fc1c515f509e6026 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 18ede636bc29fd8bda628fe3e5c593f8c1b734f4) +(fixed typo in commit message) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 2 ++ + migration/multifd-zstd.c | 2 ++ + migration/multifd.c | 1 + + 3 files changed, 5 insertions(+) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index ab4ba75d75..f403d2f031 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -74,6 +74,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -96,6 +97,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 693bddf8c9..8d657f8860 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -86,6 +86,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -109,6 +110,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd.c b/migration/multifd.c +index 8ea86d81dc..cdeffdc4c5 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -66,6 +66,7 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + { +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch b/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch new file mode 100644 index 0000000..7996f87 --- /dev/null +++ b/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch @@ -0,0 +1,50 @@ +From 8c1edb1889ff44506f35fa185d6569b0dd9d7260 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 07/37] multifd: Fill offset and block for reception +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [7/26] 51a9e6b76af956d63fc735172211d9bf6f0f6f80 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We were using the iov directly, but we will need this info on the +following patch. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 01102a2ef6c97acc5cc8a2c3bb62b7665a20f51f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 55d99a8232..0533da154a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -354,6 +354,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + ++ p->pages->block = block; + for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +@@ -363,6 +364,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + offset, block->used_length); + return -1; + } ++ p->pages->offset[i] = offset; + p->pages->iov[i].iov_base = block->host + offset; + p->pages->iov[i].iov_len = page_size; + } +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch new file mode 100644 index 0000000..dccdf1f --- /dev/null +++ b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -0,0 +1,182 @@ +From 7a7e2191f1ac4114380248cbd3c6ab7425250747 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 23/37] multifd: Implement zero copy write in multifd migration + (multifd-zero-copy) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [23/26] 904ce3909cfef62dd84cc7d3c6a3482e7e6f28e9 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Implement zero copy send on nocomp_send_write(), by making use of QIOChannel +writev + flags & flush interface. + +Change multifd_send_sync_main() so flush_zero_copy() can be called +after each iteration in order to make sure all dirty pages are sent before +a new iteration is started. It will also flush at the beginning and at the +end of migration. + +Also make it return -1 if flush_zero_copy() fails, in order to cancel +the migration process, and avoid resuming the guest in the target host +without receiving all current RAM. + +This will work fine on RAM migration because the RAM pages are not usually freed, +and there is no problem on changing the pages content between writev_zero_copy() and +the actual sending of the buffer, because this change will dirty the page and +cause it to be re-sent on a next iteration anyway. + +A lot of locked memory may be needed in order to use multifd migration +with zero-copy enabled, so disabling the feature should be necessary for +low-privileged users trying to perform multifd migrations. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-9-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 11 ++++++++++- + migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- + migration/multifd.h | 2 ++ + migration/socket.c | 5 +++-- + 4 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 8e28f2ee41..5357efd348 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1471,7 +1471,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +- ++#ifdef CONFIG_LINUX ++ if (params->zero_copy_send && ++ (!migrate_use_multifd() || ++ params->multifd_compression != MULTIFD_COMPRESSION_NONE || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif + return true; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index 193f70cdba..90ab4c4346 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -576,6 +576,7 @@ void multifd_save_cleanup(void) + int multifd_send_sync_main(QEMUFile *f) + { + int i; ++ bool flush_zero_copy; + + if (!migrate_use_multifd()) { + return 0; +@@ -586,6 +587,20 @@ int multifd_send_sync_main(QEMUFile *f) + return -1; + } + } ++ ++ /* ++ * When using zero-copy, it's necessary to flush the pages before any of ++ * the pages can be sent again, so we'll make sure the new version of the ++ * pages will always arrive _later_ than the old pages. ++ * ++ * Currently we achieve this by flushing the zero-page requested writes ++ * per ram iteration, but in the future we could potentially optimize it ++ * to be less frequent, e.g. only after we finished one whole scanning of ++ * all the dirty bitmaps. ++ */ ++ ++ flush_zero_copy = migrate_use_zero_copy_send(); ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -607,6 +622,17 @@ int multifd_send_sync_main(QEMUFile *f) + ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); ++ ++ if (flush_zero_copy && p->c) { ++ int ret; ++ Error *err = NULL; ++ ++ ret = qio_channel_flush(p->c, &err); ++ if (ret < 0) { ++ error_report_err(err); ++ return -1; ++ } ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -691,8 +717,8 @@ static void *multifd_send_thread(void *opaque) + p->iov[0].iov_base = p->packet; + } + +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); ++ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, ++ 0, p->write_flags, &local_err); + if (ret != 0) { + break; + } +@@ -933,6 +959,13 @@ int multifd_save_setup(Error **errp) + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); ++ ++ if (migrate_use_zero_copy_send()) { ++ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } else { ++ p->write_flags = 0; ++ } ++ + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 92de878155..11d5e273e6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -95,6 +95,8 @@ typedef struct { + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; ++ /* multifd flags for sending ram */ ++ int write_flags; + /* multifd flags for each packet */ + uint32_t flags; + /* size of the next packet that contains pages */ +diff --git a/migration/socket.c b/migration/socket.c +index 3754d8f72c..4fd5e85f50 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send()) { +- error_setg(&err, "Zero copy send not available in migration"); ++ if (migrate_use_zero_copy_send() && ++ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + + out: +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch b/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..e23d35d --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch @@ -0,0 +1,98 @@ +From 75cd92cb7cff055f46163e64d66ba3f685f9ac04 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 09/37] multifd: Make zlib compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [9/26] d33dd62b833d50fee989a195aebcc8d5e7d43181 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a5ed22948873b50fcf1415d1ce15c71d61a9388d) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 330fc021c5..a1950a4588 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -100,8 +101,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; +@@ -115,8 +116,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_in = iov[i].iov_len; +- zs->next_in = iov[i].iov_base; ++ zs->avail_in = page_size; ++ zs->next_in = p->pages->block->host + p->pages->offset[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +@@ -240,6 +241,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ +@@ -264,7 +266,6 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + zs->next_in = z->zbuff; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +@@ -272,8 +273,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_out = iov->iov_len; +- zs->next_out = iov->iov_base; ++ zs->avail_out = page_size; ++ zs->next_out = p->pages->block->host + p->pages->offset[i]; + + /* + * Welcome to inflate semantics +@@ -286,8 +287,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = inflate(zs, flush); + } while (ret == Z_OK && zs->avail_in +- && (zs->total_out - start) < iov->iov_len); +- if (ret == Z_OK && (zs->total_out - start) < iov->iov_len) { ++ && (zs->total_out - start) < page_size); ++ if (ret == Z_OK && (zs->total_out - start) < page_size) { + error_setg(errp, "multifd %d: inflate generated too few output", + p->id); + return -1; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch b/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch new file mode 100644 index 0000000..6310738 --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch @@ -0,0 +1,53 @@ +From 1cdab9cadef1ed84ec34651a1edbffa36c1e67d0 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 12/37] multifd: Make zlib use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [12/26] 58630452e14802e71a9eadb17cfe4964ebf8e091 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 48a4a44c1cde382c6b8e7792d01fe7d9b0a59c69) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a987e4a26c..96475e096e 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -145,6 +145,9 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + } + out_size += available - zs->avail_out; + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = out_size; ++ p->iovs_num++; + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + +@@ -164,10 +167,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zlib_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch b/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..3a10280 --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch @@ -0,0 +1,94 @@ +From ab6262bd4829e3bd6437fe32737209df2af2d141 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 08/37] multifd: Make zstd compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [8/26] 010579fa73b5a4c6fd631dc9fbaf6f974974bc99 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit f5ff548774c22b34a0c0e2fef85f1be11160d774) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f0d1105792..d9ed42622b 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -113,8 +114,8 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + int ret; + uint32_t i; + +@@ -128,8 +129,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = iov[i].iov_base; +- z->in.size = iov[i].iov_len; ++ z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.size = page_size; + z->in.pos = 0; + + /* +@@ -261,7 +262,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = p->pages->num * qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * page_size; + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,10 +285,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + z->in.pos = 0; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; +- +- z->out.dst = iov->iov_base; +- z->out.size = iov->iov_len; ++ z->out.dst = p->pages->block->host + p->pages->offset[i]; ++ z->out.size = page_size; + z->out.pos = 0; + + /* +@@ -300,8 +300,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); + } while (ret > 0 && (z->in.size - z->in.pos > 0) +- && (z->out.pos < iov->iov_len)); +- if (ret > 0 && (z->out.pos < iov->iov_len)) { ++ && (z->out.pos < page_size)); ++ if (ret > 0 && (z->out.pos < page_size)) { + error_setg(errp, "multifd %d: decompressStream buffer too small", + p->id); + return -1; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch b/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch new file mode 100644 index 0000000..af3e7fb --- /dev/null +++ b/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch @@ -0,0 +1,53 @@ +From bac5ce0b4d3552d6056045f201b4e50dd6204b31 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 13/37] multifd: Make zstd use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [13/26] 4d7036fb32efdf088d23737b9710e6ad1a4654aa +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 0a818b89eb8eaf79ae651405907d8110a0935cfd) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 2185a83eac..4e60cdbc54 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -156,6 +156,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return -1; + } + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = z->out.pos; ++ p->iovs_num++; + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + +@@ -175,10 +178,7 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zstd_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch b/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch new file mode 100644 index 0000000..6a59707 --- /dev/null +++ b/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch @@ -0,0 +1,190 @@ +From 1181a9cbcaf37a82aa7bf117ef209f554b8c4a71 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 11/37] multifd: Move iov from pages to params +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [11/26] 24dff3ef68cf3327811242193502319ed3e3940a +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +This will allow us to reduce the number of system calls on the next patch. + +Signed-off-by: Juan Quintela +(cherry picked from commit 226468ba3dea950ab4bb0b729878dde25812da1c) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 34 ++++++++++++++++++++++++---------- + migration/multifd.h | 8 ++++++-- + 2 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0d19470f9..5004f394aa 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -86,7 +86,16 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = p->pages->num * qemu_target_page_size(); ++ MultiFDPages_t *pages = p->pages; ++ size_t page_size = qemu_target_page_size(); ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ p->iov[p->iovs_num].iov_len = page_size; ++ p->iovs_num++; ++ } ++ ++ p->next_packet_size = p->pages->num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -104,7 +113,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- return qio_channel_writev_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +@@ -146,13 +155,18 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ size_t page_size = qemu_target_page_size(); + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[i].iov_base = p->pages->block->host + p->pages->offset[i]; ++ p->iov[i].iov_len = page_size; ++ } ++ return qio_channel_readv_all(p->c, p->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -242,7 +256,6 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + + pages->allocated = size; +- pages->iov = g_new0(struct iovec, size); + pages->offset = g_new0(ram_addr_t, size); + + return pages; +@@ -254,8 +267,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +- g_free(pages->iov); +- pages->iov = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +@@ -365,8 +376,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + p->pages->offset[i] = offset; +- p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = page_size; + } + + return 0; +@@ -470,8 +479,6 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + + if (pages->block == block) { + pages->offset[pages->num] = offset; +- pages->iov[pages->num].iov_base = block->host + offset; +- pages->iov[pages->num].iov_len = qemu_target_page_size(); + pages->num++; + + if (pages->num < pages->allocated) { +@@ -564,6 +571,8 @@ void multifd_save_cleanup(void) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -651,6 +660,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; ++ p->iovs_num = 0; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -919,6 +929,7 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); ++ p->iov = g_new0(struct iovec, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +@@ -1018,6 +1029,8 @@ int multifd_load_cleanup(Error **errp) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_recv_state->ops->recv_cleanup(p); + } + qemu_sem_destroy(&multifd_recv_state->sem_sync); +@@ -1158,6 +1171,7 @@ int multifd_load_setup(Error **errp) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->name = g_strdup_printf("multifdrecv_%d", i); ++ p->iov = g_new0(struct iovec, page_count); + } + + for (i = 0; i < thread_count; i++) { +diff --git a/migration/multifd.h b/migration/multifd.h +index e57adc783b..c3f18af364 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -62,8 +62,6 @@ typedef struct { + uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; +- /* pointer to each page */ +- struct iovec *iov; + RAMBlock *block; + } MultiFDPages_t; + +@@ -110,6 +108,10 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to send */ ++ struct iovec *iov; ++ /* number of iovs used */ ++ uint32_t iovs_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +@@ -149,6 +151,8 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to recv */ ++ struct iovec *iov; + /* used for de-compression methods */ + void *data; + } MultiFDRecvParams; +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Remove-send_write-method.patch b/SOURCES/kvm-multifd-Remove-send_write-method.patch new file mode 100644 index 0000000..79fc649 --- /dev/null +++ b/SOURCES/kvm-multifd-Remove-send_write-method.patch @@ -0,0 +1,160 @@ +From 2952487c7e5ed14796fbffae0b964a35790d6850 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 14/37] multifd: Remove send_write() method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [14/26] 5fa59ffa09099fbc6da84e9a192ca71af52cc98f +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Everything use now iov's. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 468fcb5dd0c965e1af0da9efab09b1462631da18) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 ----------------- + migration/multifd-zstd.c | 17 ----------------- + migration/multifd.c | 20 ++------------------ + migration/multifd.h | 2 -- + 4 files changed, 2 insertions(+), 54 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 96475e096e..8ed29b9633 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -154,22 +154,6 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zlib_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zlib_recv_setup: setup receive side + * +@@ -312,7 +296,6 @@ static MultiFDMethods multifd_zlib_ops = { + .send_setup = zlib_send_setup, + .send_cleanup = zlib_send_cleanup, + .send_prepare = zlib_send_prepare, +- .send_write = zlib_send_write, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, + .recv_pages = zlib_recv_pages +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 4e60cdbc54..25e1f517b5 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -165,22 +165,6 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zstd_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zstd_recv_setup: setup receive side + * +@@ -325,7 +309,6 @@ static MultiFDMethods multifd_zstd_ops = { + .send_setup = zstd_send_setup, + .send_cleanup = zstd_send_cleanup, + .send_prepare = zstd_send_prepare, +- .send_write = zstd_send_write, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, + .recv_pages = zstd_recv_pages +diff --git a/migration/multifd.c b/migration/multifd.c +index 5004f394aa..1e1551d78b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -100,22 +100,6 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * nocomp_send_write: do the actual write of the data +- * +- * For no compression we just have to write the data. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * nocomp_recv_setup: setup receive side + * +@@ -173,7 +157,6 @@ static MultiFDMethods multifd_nocomp_ops = { + .send_setup = nocomp_send_setup, + .send_cleanup = nocomp_send_cleanup, + .send_prepare = nocomp_send_prepare, +- .send_write = nocomp_send_write, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, + .recv_pages = nocomp_recv_pages +@@ -687,7 +670,8 @@ static void *multifd_send_thread(void *opaque) + } + + if (used) { +- ret = multifd_send_state->ops->send_write(p, used, &local_err); ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index c3f18af364..7496f951a7 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -164,8 +164,6 @@ typedef struct { + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ + int (*send_prepare)(MultiFDSendParams *p, Error **errp); +- /* Write the send packet */ +- int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Rename-used-field-to-num.patch b/SOURCES/kvm-multifd-Rename-used-field-to-num.patch new file mode 100644 index 0000000..24bdd8c --- /dev/null +++ b/SOURCES/kvm-multifd-Rename-used-field-to-num.patch @@ -0,0 +1,177 @@ +From 003ef20d11b33a7139fae6fbcf170188a07afc43 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 02/37] multifd: Rename used field to num +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [2/26] 952283197ef89be4d61c7690bb6c3194e5c67217 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We will need to split it later in zero_num (number of zero pages) and +normal_num (number of normal pages). This name is better. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 90a3d2f9d5f729147b2827c177932603ae6e2d55) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 38 +++++++++++++++++++------------------- + migration/multifd.h | 2 +- + 2 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8125d0015c..8ea86d81dc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -252,7 +252,7 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + + static void multifd_pages_clear(MultiFDPages_t *pages) + { +- pages->used = 0; ++ pages->num = 0; + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +@@ -270,7 +270,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->used); ++ packet->pages_used = cpu_to_be32(p->pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -278,7 +278,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->pages->offset[i]; + +@@ -332,18 +332,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages = multifd_pages_init(packet->pages_alloc); + } + +- p->pages->used = be32_to_cpu(packet->pages_used); +- if (p->pages->used > packet->pages_alloc) { ++ p->pages->num = be32_to_cpu(packet->pages_used); ++ if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " + "with %d pages and expected maximum pages are %d", +- p->pages->used, packet->pages_alloc) ; ++ p->pages->num, packet->pages_alloc) ; + return -1; + } + + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + +- if (p->pages->used == 0) { ++ if (p->pages->num == 0) { + return 0; + } + +@@ -356,7 +356,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + + if (offset > (block->used_length - page_size)) { +@@ -443,13 +443,13 @@ static int multifd_send_pages(QEMUFile *f) + } + qemu_mutex_unlock(&p->mutex); + } +- assert(!p->pages->used); ++ assert(!p->pages->num); + assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; +- transferred = ((uint64_t) pages->used) * qemu_target_page_size() ++ transferred = ((uint64_t) pages->num) * qemu_target_page_size() + + p->packet_len; + qemu_file_update_transfer(f, transferred); + ram_counters.multifd_bytes += transferred; +@@ -469,12 +469,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + } + + if (pages->block == block) { +- pages->offset[pages->used] = offset; +- pages->iov[pages->used].iov_base = block->host + offset; +- pages->iov[pages->used].iov_len = qemu_target_page_size(); +- pages->used++; ++ pages->offset[pages->num] = offset; ++ pages->iov[pages->num].iov_base = block->host + offset; ++ pages->iov[pages->num].iov_len = qemu_target_page_size(); ++ pages->num++; + +- if (pages->used < pages->allocated) { ++ if (pages->num < pages->allocated) { + return 1; + } + } +@@ -586,7 +586,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (!migrate_use_multifd()) { + return; + } +- if (multifd_send_state->pages->used) { ++ if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; +@@ -649,7 +649,7 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->used; ++ uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + flags = p->flags; + +@@ -665,7 +665,7 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; +- p->pages->used = 0; ++ p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +@@ -1091,7 +1091,7 @@ static void *multifd_recv_thread(void *opaque) + break; + } + +- used = p->pages->used; ++ used = p->pages->num; + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +diff --git a/migration/multifd.h b/migration/multifd.h +index 15c50ca0b2..86820dd028 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -55,7 +55,7 @@ typedef struct { + + typedef struct { + /* number of used pages */ +- uint32_t used; ++ uint32_t num; + /* number of allocated pages */ + uint32_t allocated; + /* global number of generated multifd packets */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch new file mode 100644 index 0000000..d54cce8 --- /dev/null +++ b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -0,0 +1,102 @@ +From 33a38fef5e889b45571228bde519746fd90d8877 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 22/37] multifd: Send header packet without flags if + zero-copy-send is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [22/26] 9abfee42b72f11911cf128519826d09cbd2f5bc3 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Since d48c3a0445 ("multifd: Use a single writev on the send side"), +sending the header packet and the memory pages happens in the same +writev, which can potentially make the migration faster. + +Using channel-socket as example, this works well with the default copying +mechanism of sendmsg(), but with zero-copy-send=true, it will cause +the migration to often break. + +This happens because the header packet buffer gets reused quite often, +and there is a high chance that by the time the MSG_ZEROCOPY mechanism get +to send the buffer, it has already changed, sending the wrong data and +causing the migration to abort. + +It means that, as it is, the buffer for the header packet is not suitable +for sending with MSG_ZEROCOPY. + +In order to enable zero copy for multifd, send the header packet on an +individual write(), without any flags, and the remanining pages with a +writev(), as it was happening before. This only changes how a migration +with zero-copy-send=true works, not changing any current behavior for +migrations with zero-copy-send=false. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-8-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1e34e01ebc..193f70cdba 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -624,6 +624,7 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; ++ bool use_zero_copy_send = migrate_use_zero_copy_send(); + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -646,9 +647,14 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 1; + p->normal_num = 0; + ++ if (use_zero_copy_send) { ++ p->iovs_num = 0; ++ } else { ++ p->iovs_num = 1; ++ } ++ + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; +@@ -672,8 +678,18 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ } else { ++ /* Send header using the same writev call */ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ } + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch b/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch new file mode 100644 index 0000000..ef5e6d2 --- /dev/null +++ b/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch @@ -0,0 +1,48 @@ +From 56cd14fc23c58707b9184da11f36d777bba6ce78 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 04/37] multifd: The variable is only used inside the loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [4/26] 45d8bbde75ebbef6329c41ddb56db4526739f94f +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 1943c11a62bd0741e5d9fbba78404fe47ebea820) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdeffdc4c5..ce7101cf9d 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -629,7 +629,6 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; +- uint32_t flags = 0; + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -652,7 +651,7 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; +- flags = p->flags; ++ uint32_t flags = p->flags; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, used, +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch b/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch new file mode 100644 index 0000000..b4f3036 --- /dev/null +++ b/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch @@ -0,0 +1,80 @@ +From 4051de396e02ea2c1911c842426318bcd97f93c7 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 15/37] multifd: Use a single writev on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [15/26] c37063c813fc0ba695072117f272360e5c413803 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Until now, we wrote the packet header with write(), and the rest of the +pages with writev(). Just increase the size of the iovec and do a +single writev(). + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit d48c3a044537689866fe44e65d24c7d39a68868a) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1e1551d78b..d0f86542b1 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -643,7 +643,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 0; ++ p->iovs_num = 1; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -663,20 +663,15 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, used, flags, + p->next_packet_size); + +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } + +- if (used) { +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); +- if (ret != 0) { +- break; +- } +- } +- + qemu_mutex_lock(&p->mutex); + p->pending_job--; + qemu_mutex_unlock(&p->mutex); +@@ -913,7 +908,8 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); +- p->iov = g_new0(struct iovec, page_count); ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, page_count + 1); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch b/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch new file mode 100644 index 0000000..032dac2 --- /dev/null +++ b/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch @@ -0,0 +1,261 @@ +From 3b57c876e1eaca34fb5bd9067553de945013d4be Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 16/37] multifd: Use normal pages array on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [16/26] 1c48806474daf48fe93920ac361311af95c6a6f3 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We are only sending normal pages through multifd channels. +Later on this series, we are going to also send zero pages. +We are going to detect if a page is zero or non zero in the multifd +channel thread, not on the main thread. + +So we receive an array of pages page->offset[N] + +And we will end with: + +p->normal[N - zero_pages] +p->zero[zero_pages]. + +In this patch, we just copy all the pages in offset to normal. + +for (i = 0; i < pages->num; i++) { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: +} + +Later in the series this becomes: + +for (i = 0; i < pages->num; i++) { + if (buffer_is_zero(page->offset[i])) { + p->zerol[p->zero_num] = pages->offset[i]; + p->zero_num++: + } else { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: + } +} + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Improving comment (dave) +Renaming num_normal_pages to total_normal_pages (peter) + +(cherry picked from commit 815956f03902980c771da64b17f7f791c1cb57b0) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 6 +++--- + migration/multifd-zstd.c | 6 +++--- + migration/multifd.c | 30 +++++++++++++++++++----------- + migration/multifd.h | 8 ++++++-- + migration/trace-events | 4 ++-- + 5 files changed, 33 insertions(+), 21 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 8ed29b9633..8508f26adf 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -108,16 +108,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } + + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->pages->offset[i]; ++ zs->next_in = p->pages->block->host + p->normal[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 25e1f517b5..693af3a140 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -123,13 +123,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.src = p->pages->block->host + p->normal[i]; + z->in.size = page_size; + z->in.pos = 0; + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0f86542b1..3725226400 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + MultiFDPages_t *pages = p->pages; + size_t page_size = qemu_target_page_size(); + +- for (int i = 0; i < p->pages->num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ for (int i = 0; i < p->normal_num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; + p->iov[p->iovs_num].iov_len = page_size; + p->iovs_num++; + } + +- p->next_packet_size = p->pages->num * page_size; ++ p->next_packet_size = p->normal_num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->num); ++ packet->pages_used = cpu_to_be32(p->normal_num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ +- uint64_t temp = p->pages->offset[i]; ++ uint64_t temp = p->normal[i]; + + packet->offset[i] = cpu_to_be64(temp); + } +@@ -556,6 +556,8 @@ void multifd_save_cleanup(void) + p->packet = NULL; + g_free(p->iov); + p->iov = NULL; ++ g_free(p->normal); ++ p->normal = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; + p->iovs_num = 1; ++ p->normal_num = 0; ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->normal[p->normal_num] = p->pages->offset[i]; ++ p->normal_num++; ++ } + +- if (used) { ++ if (p->normal_num) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); +@@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque) + multifd_send_fill_packet(p); + p->flags = 0; + p->num_packets++; +- p->num_pages += used; ++ p->total_normal_pages += p->normal_num; + p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, used, flags, ++ trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + + p->iov[0].iov_len = p->packet_len; +@@ -710,7 +717,7 @@ out: + qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); +- trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages); ++ trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); + + return NULL; + } +@@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp) + p->tls_hostname = g_strdup(s->hostname); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); ++ p->normal = g_new0(ram_addr_t, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 7496f951a7..7823199dbe 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -104,14 +104,18 @@ typedef struct { + /* thread local variables */ + /* packets sent through this channel */ + uint64_t num_packets; +- /* pages sent through this channel */ +- uint64_t num_pages; ++ /* non zero pages sent through this channel */ ++ uint64_t total_normal_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; ++ /* Pages that are not zero */ ++ ram_addr_t *normal; ++ /* num of non zero pages */ ++ uint32_t normal_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +diff --git a/migration/trace-events b/migration/trace-events +index 5172cb3b3d..171a83a55d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" + multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" + multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch new file mode 100644 index 0000000..7912266 --- /dev/null +++ b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -0,0 +1,163 @@ +From fce933410a5068220a5f29011a6d1a647e357a62 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 21/37] multifd: multifd_send_sync_main now returns negative on + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [21/26] b4e4f3663576aa87f3b2f66f1d38bad4f50bd4ac +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Even though multifd_send_sync_main() currently emits error_reports, it's +callers don't really check it before continuing. + +Change multifd_send_sync_main() to return -1 on error and 0 on success. +Also change all it's callers to make use of this change and possibly fail +earlier. + +(This change is important to next patch on multifd zero copy +implementation, to make it sure an error in zero-copy flush does not go +unnoticed. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Message-Id: <20220513062836.965425-7-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 10 ++++++---- + migration/multifd.h | 2 +- + migration/ram.c | 29 ++++++++++++++++++++++------- + 3 files changed, 29 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index e53811f04a..1e34e01ebc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -573,17 +573,17 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-void multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(QEMUFile *f) + { + int i; + + if (!migrate_use_multifd()) { +- return; ++ return 0; + } + if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); +- return; ++ return -1; + } + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -596,7 +596,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); +- return; ++ return -1; + } + + p->packet_num = multifd_send_state->packet_num++; +@@ -615,6 +615,8 @@ void multifd_send_sync_main(QEMUFile *f) + qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); ++ ++ return 0; + } + + static void *multifd_send_thread(void *opaque) +diff --git a/migration/multifd.h b/migration/multifd.h +index 7823199dbe..92de878155 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); + bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-void multifd_send_sync_main(QEMUFile *f); ++int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ +diff --git a/migration/ram.c b/migration/ram.c +index 863035d235..3e208efca7 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2992,6 +2992,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + { + RAMState **rsp = opaque; + RAMBlock *block; ++ int ret; + + if (compress_threads_save_setup()) { + return -1; +@@ -3026,7 +3027,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3135,7 +3140,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- multifd_send_sync_main(rs->f); ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_counters.transferred += 8; +@@ -3193,13 +3202,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- if (ret >= 0) { +- multifd_send_sync_main(rs->f); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret < 0) { ++ return ret; + } + +- return ret; ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ++ return 0; + } + + static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch b/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch new file mode 100644 index 0000000..3f3b923 --- /dev/null +++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch @@ -0,0 +1,135 @@ +From 5f53448092c944857a2b89138f22c5ab335d8250 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 05/37] multifd: remove used parameter from send_prepare() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [5/26] ad6360d19d65e8c332dcdc3d3234478639e03db8 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 02fb81043ecee338e4aeb8f5be09a46325dc5e43) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 7 +++---- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 9 +++------ + migration/multifd.h | 2 +- + 4 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index f403d2f031..0c70a2dc78 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -96,10 +96,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; +@@ -108,11 +107,11 @@ static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 8d657f8860..466b370cad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -109,10 +109,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; +@@ -123,10 +122,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } + z->in.src = iov[i].iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index ce7101cf9d..098ef8842c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -82,13 +82,11 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_send_prepare(MultiFDSendParams *p, uint32_t used, +- Error **errp) ++static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = used * qemu_target_page_size(); ++ p->next_packet_size = p->pages->num * qemu_target_page_size(); + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -654,8 +652,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t flags = p->flags; + + if (used) { +- ret = multifd_send_state->ops->send_prepare(p, used, +- &local_err); ++ ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); + break; +diff --git a/migration/multifd.h b/migration/multifd.h +index 86820dd028..7968cc5c20 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -159,7 +159,7 @@ typedef struct { + /* Cleanup for sending side */ + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ +- int (*send_prepare)(MultiFDSendParams *p, uint32_t used, Error **errp); ++ int (*send_prepare)(MultiFDSendParams *p, Error **errp); + /* Write the send packet */ + int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ +-- +2.35.3 + diff --git a/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch b/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch new file mode 100644 index 0000000..02c5918 --- /dev/null +++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch @@ -0,0 +1,149 @@ +From 8cdedf86dc193673ea24516e7b44f8b4da5dd713 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 06/37] multifd: remove used parameter from send_recv_pages() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [6/26] 5c1a506e4178501a0894ea4e7ac919e1d4d4cc32 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 40a4bfe9d3f8ad35a9c3ffb4cbf7367e2777054b) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 9 ++++----- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 7 +++---- + migration/multifd.h | 2 +- + 4 files changed, 11 insertions(+), 14 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 0c70a2dc78..330fc021c5 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -235,17 +235,16 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ + uint32_t out_size = zs->total_out; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + int ret; + int i; +@@ -264,12 +263,12 @@ static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + zs->avail_in = in_size; + zs->next_in = z->zbuff; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 466b370cad..f0d1105792 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -255,14 +255,13 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,7 +282,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + z->in.size = in_size; + z->in.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + + z->out.dst = iov->iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index 098ef8842c..55d99a8232 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -141,10 +141,9 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + +@@ -153,7 +152,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -1099,7 +1098,7 @@ static void *multifd_recv_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + if (used) { +- ret = multifd_recv_state->ops->recv_pages(p, used, &local_err); ++ ret = multifd_recv_state->ops->recv_pages(p, &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 7968cc5c20..e57adc783b 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -167,7 +167,7 @@ typedef struct { + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); + /* Read all pages */ +- int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp); ++ int (*recv_pages)(MultiFDRecvParams *p, Error **errp); + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); +-- +2.35.3 + diff --git a/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch new file mode 100644 index 0000000..68f7647 --- /dev/null +++ b/SOURCES/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch @@ -0,0 +1,287 @@ +From 35bf6693fb5bba5a9d5fdf4a7fdac06ce574b83d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:05 -0400 +Subject: [PATCH 1/7] numa: Enable numa for SGX EPC sections + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [1/5] c29297cbacc4cb65c9ac125db349a767aa2574af +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The basic SGX did not enable numa for SGX EPC sections, which +result in all EPC sections located in numa node 0. This patch +enable SGX numa function in the guest and the EPC section can +work with RAM as one numa node. + +The Guest kernel related log: +[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] +[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] +The SRAT table can normally show SGX EPC sections menory info in different +numa nodes. + +The SGX EPC numa related command: + ...... + -m 4G,maxmem=20G \ + -smp sockets=2,cores=2 \ + -cpu host,+sgx-provisionkey \ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ + ...... + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) +Signed-off-by: Paul Lai +--- + hw/core/numa.c | 5 ++--- + hw/i386/acpi-build.c | 2 ++ + hw/i386/sgx-epc.c | 3 +++ + hw/i386/sgx-stub.c | 4 ++++ + hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ + include/hw/i386/sgx-epc.h | 3 +++ + monitor/hmp-cmds.c | 1 + + qapi/machine.json | 10 ++++++++- + qemu-options.hx | 4 ++-- + 9 files changed, 70 insertions(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e6050b2273..1aa05dcf42 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) + break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; +- /* TODO: once we support numa, assign to right node */ +- node_mem[0].node_mem += se->size; +- node_mem[0].node_plugged_mem += se->size; ++ node_mem[se->node].node_mem += se->size; ++ node_mem[se->node].node_plugged_mem = 0; + break; + default: + g_assert_not_reached(); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 447ea35275..a4478e77b7 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2071,6 +2071,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + nvdimm_build_srat(table_data); + } + ++ sgx_epc_build_srat(table_data); ++ + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I +diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c +index e508827e78..96b2940d75 100644 +--- a/hw/i386/sgx-epc.c ++++ b/hw/i386/sgx-epc.c +@@ -21,6 +21,7 @@ + + static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), ++ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); ++ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, ++ NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; +diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c +index c9b379e665..26833eb233 100644 +--- a/hw/i386/sgx-stub.c ++++ b/hw/i386/sgx-stub.c +@@ -6,6 +6,10 @@ + #include "qapi/error.h" + #include "qapi/qapi-commands-misc-target.h" + ++void sgx_epc_build_srat(GArray *table_data) ++{ ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + error_setg(errp, "SGX support is not compiled in"); +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 8fef3dd8fa..d04299904a 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -23,6 +23,7 @@ + #include "sysemu/hw_accel.h" + #include "sysemu/reset.h" + #include ++#include "hw/acpi/aml-build.h" + + #define SGX_MAX_EPC_SECTIONS 8 + #define SGX_CPUID_EPC_INVALID 0x0 +@@ -36,6 +37,46 @@ + + #define RETRY_NUM 2 + ++static int sgx_epc_device_list(Object *obj, void *opaque) ++{ ++ GSList **list = opaque; ++ ++ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { ++ *list = g_slist_append(*list, DEVICE(obj)); ++ } ++ ++ object_child_foreach(obj, sgx_epc_device_list, opaque); ++ return 0; ++} ++ ++static GSList *sgx_epc_get_device_list(void) ++{ ++ GSList *list = NULL; ++ ++ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); ++ return list; ++} ++ ++void sgx_epc_build_srat(GArray *table_data) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ uint64_t addr, size; ++ int node; ++ ++ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); ++ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); ++ ++ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); ++ } ++ g_slist_free(device_list); ++} ++ + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + { + return (low & MAKE_64BIT_MASK(12, 20)) + +@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); ++ /* set the numa node property for sgx epc object */ ++ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, ++ &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } +diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h +index a6a65be854..581fac389a 100644 +--- a/include/hw/i386/sgx-epc.h ++++ b/include/hw/i386/sgx-epc.h +@@ -25,6 +25,7 @@ + #define SGX_EPC_ADDR_PROP "addr" + #define SGX_EPC_SIZE_PROP "size" + #define SGX_EPC_MEMDEV_PROP "memdev" ++#define SGX_EPC_NUMA_NODE_PROP "node" + + /** + * SGXEPCDevice: +@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { + + /* public */ + uint64_t addr; ++ uint32_t node; + HostMemoryBackendEpc *hostmem; + } SGXEPCDevice; + +@@ -56,6 +58,7 @@ typedef struct SGXEPCState { + } SGXEPCState; + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); ++void sgx_epc_build_srat(GArray *table_data); + + static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 9c91bf93e9..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) + se->id ? se->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", se->size); ++ monitor_printf(mon, " node: %" PRId64 "\n", se->node); + monitor_printf(mon, " memdev: %s\n", se->memdev); + break; + default: +diff --git a/qapi/machine.json b/qapi/machine.json +index 067e3f5378..16e771affc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,12 +1207,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPCDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', ++ 'node': 'int', + 'memdev': 'str' + } + } +@@ -1285,10 +1288,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPC', +- 'data': { 'memdev': 'str' } } ++ 'data': { 'memdev': 'str', ++ 'node': 'int' ++ } ++} + + ## + # @SgxEPCProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 94c4a8dbaf..4b7798088b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -127,11 +127,11 @@ SRST + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +- " sgx-epc.0.memdev=memid\n", ++ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", + QEMU_ARCH_ALL) + + SRST +-``sgx-epc.0.memdev=@var{memid}`` ++``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` + Define an SGX EPC section. + ERST + +-- +2.27.0 + diff --git a/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch new file mode 100644 index 0000000..659dc22 --- /dev/null +++ b/SOURCES/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch @@ -0,0 +1,210 @@ +From ea46a86ba6319ea98573c65af5186cd5399ab0ce Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:07 -0400 +Subject: [PATCH 2/7] numa: Support SGX numa in the monitor and Libvirt + interfaces + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [2/5] 403c4f98dccd023293cd3246081ae12f4782bed0 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGXEPCSection list into SGXInfo to show the multiple +SGX EPC sections detailed info, not the total size like before. +This patch can enable numa support for 'info sgx' command and +QMP interfaces. The new interfaces show each EPC section info +in one numa node. Libvirt can use QMP interface to get the +detailed host SGX EPC capabilities to decide how to allocate +host EPC sections to guest. + +(qemu) info sgx + SGX support: enabled + SGX1 support: enabled + SGX2 support: enabled + FLC support: enabled + NUMA node #0: size=67108864 + NUMA node #1: size=29360128 + +The QMP interface show: +(QEMU) query-sgx +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} + +(QEMU) query-sgx-capabilities +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) +Signed-off-by: Paul Lai +--- + hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- + qapi/misc-target.json | 19 ++++++++++++++-- + 2 files changed, 59 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index d04299904a..5de5dd0893 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static uint64_t sgx_calc_host_epc_section_size(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + { ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; +- uint64_t size = 0; ++ uint32_t j = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); +@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) + break; + } + +- size += sgx_calc_section_metric(ecx, edx); ++ section = g_new0(SGXEPCSection, 1); ++ section->node = j++; ++ section->size = sgx_calc_section_metric(ecx, edx); ++ QAPI_LIST_APPEND(tail, section); + } + +- return size; ++ return head; + } + + static void sgx_epc_reset(void *opaque) +@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->section_size = sgx_calc_host_epc_section_size(); ++ info->sections = sgx_calc_host_epc_sections(); + + close(fd); + + return info; + } + ++static SGXEPCSectionList *sgx_get_epc_sections_list(void) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ ++ section = g_new0(SGXEPCSection, 1); ++ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, ++ &error_abort); ++ QAPI_LIST_APPEND(tail, section); ++ } ++ g_slist_free(device_list); ++ ++ return head; ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + SGXInfo *info = NULL; +@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + +- SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; +- info->section_size = sgx_epc->size; ++ info->sections = sgx_get_epc_sections_list(); + + return info; + } +@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) + void hmp_info_sgx(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; ++ SGXEPCSectionList *section_list, *section; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { +@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); +- monitor_printf(mon, "size: %" PRIu64 "\n", +- info->section_size); ++ ++ section_list = info->sections; ++ for (section = section_list; section; section = section->next) { ++ monitor_printf(mon, "NUMA node #%" PRId64 ": ", ++ section->value->node); ++ monitor_printf(mon, "size=%" PRIu64 "\n", ++ section->value->size); ++ } + } + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 5aa2b95b7d..1022aa0184 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -337,6 +337,21 @@ + 'if': 'TARGET_ARM' } + + ++## ++# @SGXEPCSection: ++# ++# Information about intel SGX EPC section info ++# ++# @node: the numa node ++# ++# @size: the size of epc section ++# ++# Since: 6.2 ++## ++{ 'struct': 'SGXEPCSection', ++ 'data': { 'node': 'int', ++ 'size': 'uint64'}} ++ + ## + # @SGXInfo: + # +@@ -350,7 +365,7 @@ + # + # @flc: true if FLC is supported + # +-# @section-size: The EPC section size for guest ++# @sections: The EPC sections info for guest + # + # Since: 6.2 + ## +@@ -359,7 +374,7 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', +- 'section-size': 'uint64'}, ++ 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + + ## +-- +2.27.0 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch new file mode 100644 index 0000000..83fe9af --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch @@ -0,0 +1,63 @@ +From 115507e5e8b97993b50ea7b39d6d4bb493973e46 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Aug 2022 11:42:14 +0200 +Subject: [PATCH 9/9] pc-bios/s390-ccw: Fix booting with logical block size < + physical block size + +RH-Author: Thomas Huth +RH-MergeRequest: 207: pc-bios/s390-ccw: Fix booting with logical block size < physical block size +RH-Commit: [1/1] ab22832592e0a48277bf7aca1b941a1be79aeab6 +RH-Bugzilla: 2112296 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +For accessing single blocks during boot, it's the logical block size that +matters. (Physical block sizes are rather interesting e.g. for creating +file systems with the correct alignment for speed reasons etc.). +So the s390-ccw bios has to use the logical block size for calculating +sector numbers during the boot phase, the "physical_block_exp" shift +value must not be taken into account. This change fixes the boot process +when the guest hast been installed on a disk where the logical block size +differs from the physical one, e.g. if the guest has been installed +like this: + + qemu-system-s390x -nographic -accel kvm -m 2G \ + -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \ + -device virtio-scsi -device scsi-cd,drive=d1 \ + -drive if=none,id=d2,file=test.qcow2,format=qcow2 + -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512 + +Linux correctly uses the logical block size of 512 for the installation, +but the s390-ccw bios tries to boot from a disk with 4096 block size so +far, as long as this patch has not been applied yet (well, it used to work +by accident in the past due to the virtio_assume_scsi() hack that used to +enforce 512 byte sectors on all virtio-block disks, but that hack has been +well removed in commit 5447de2619050a0a4d to fix other scenarios). + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112296 +Message-Id: <20220805094214.285223-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363) +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 8271c47296..794f99b42c 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -173,7 +173,7 @@ int virtio_get_block_size(void) + + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp; ++ return vdev->config.blk.blk_size; + case VIRTIO_ID_SCSI: + return vdev->scsi_block_size; + } +-- +2.31.1 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch new file mode 100644 index 0000000..89d8a91 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch @@ -0,0 +1,180 @@ +From 0e7b71a3f0b3a2e1dba54f02efc15b02f337e031 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 36/37] pc-bios/s390-ccw: Split virtio-scsi code from + virtio_blk_setup_device() + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [8/9] 8e24806a91c91b2e3603da88e5a22d96a91e8686 +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 +Author: Thomas Huth +Date: Mon Jul 4 13:19:00 2022 +0200 + + pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() + + The next patch is going to add more virtio-block specific code to + virtio_blk_setup_device(), and if the virtio-scsi code is also in + there, this is more cumbersome. And the calling function virtio_setup() + in main.c looks at the device type already anyway, so it's more + logical to separate the virtio-scsi stuff into a new function in + virtio-scsi.c instead. + + Message-Id: <20220704111903.62400-10-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- + pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ + pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- + pc-bios/s390-ccw/virtio-scsi.h | 2 +- + 4 files changed, 38 insertions(+), 27 deletions(-) + +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index 5d2b7ba94d..13e1d8fdf7 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -14,6 +14,7 @@ + #include "s390-ccw.h" + #include "cio.h" + #include "virtio.h" ++#include "virtio-scsi.h" + #include "dasd-ipl.h" + + char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +@@ -218,6 +219,7 @@ static int virtio_setup(void) + { + VDev *vdev = virtio_get_device(); + QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; ++ int ret; + + memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + +@@ -225,18 +227,26 @@ static int virtio_setup(void) + menu_setup(); + } + +- if (virtio_get_device_type() == VIRTIO_ID_NET) { ++ switch (vdev->senseid.cu_model) { ++ case VIRTIO_ID_NET: + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = qipl.netboot_start_addr; +- } else { +- int ret = virtio_blk_setup_device(blk_schid); +- if (ret) { +- return ret; +- } ++ return 0; ++ case VIRTIO_ID_BLOCK: ++ ret = virtio_blk_setup_device(blk_schid); ++ break; ++ case VIRTIO_ID_SCSI: ++ ret = virtio_scsi_setup_device(blk_schid); ++ break; ++ default: ++ panic("\n! No IPL device available !\n"); ++ } ++ ++ if (!ret) { + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } + +- return 0; ++ return ret; + } + + static void ipl_boot_device(void) +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index db1f7f44aa..c175b66a47 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) + int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); +- int ret = 0; + + vdev->schid = schid; + virtio_setup_ccw(vdev); + +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- sclp_print("Using virtio-blk.\n"); +- break; +- case VIRTIO_ID_SCSI: +- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +- "Config: sense size mismatch"); +- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, +- "Config: CDB size mismatch"); ++ sclp_print("Using virtio-blk.\n"); + +- sclp_print("Using virtio-scsi.\n"); +- ret = virtio_scsi_setup(vdev); +- break; +- default: +- panic("\n! No IPL device available !\n"); +- } +- +- return ret; ++ return 0; + } +diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c +index 2c8d0f3097..3b7069270c 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.c ++++ b/pc-bios/s390-ccw/virtio-scsi.c +@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, + } + } + +-int virtio_scsi_setup(VDev *vdev) ++static int virtio_scsi_setup(VDev *vdev) + { + int retry_test_unit_ready = 3; + uint8_t data[256]; +@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) + + return 0; + } ++ ++int virtio_scsi_setup_device(SubChannelId schid) ++{ ++ VDev *vdev = virtio_get_device(); ++ ++ vdev->schid = schid; ++ virtio_setup_ccw(vdev); ++ ++ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, ++ "Config: sense size mismatch"); ++ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, ++ "Config: CDB size mismatch"); ++ ++ sclp_print("Using virtio-scsi.\n"); ++ ++ return virtio_scsi_setup(vdev); ++} +diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h +index 4b14c2c2f9..e6b6cd4815 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.h ++++ b/pc-bios/s390-ccw/virtio-scsi.h +@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) + return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; + } + +-int virtio_scsi_setup(VDev *vdev); + int virtio_scsi_read_many(VDev *vdev, + ulong sector, void *load_addr, int sec_num); ++int virtio_scsi_setup_device(SubChannelId schid); + + #endif /* VIRTIO_SCSI_H */ +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch new file mode 100644 index 0000000..fd34b3d --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch @@ -0,0 +1,102 @@ +From 8433b2ba40d0618c7086da87685e1c51b6da3b11 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 30/37] pc-bios/s390-ccw/bootmap: Improve the guessing logic in + zipl_load_vblk() + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [2/9] db1d2e7929352bec0e1a5d4cf3fb385bbe02304b +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 422865f6672ee1482b98d18321b55c1ecfb06c82 +Author: Thomas Huth +Date: Mon Jul 4 13:18:54 2022 +0200 + + pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() + + The logic of trying an final ISO or ECKD boot on virtio-block devices is + very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), + virtio_blk_setup_device() always sets a "guessed" disk geometry via + virtio_assume_scsi() (which is certainly also wrong in a lot of cases). + + zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" + and tries to fix up the geometry again via virtio_assume_iso9660() before + always trying to do ipl_iso_el_torito(). That's a very brain-twisting + way of attempting to boot from ISO images, which won't work anymore after + the following patches that will clean up the virtio_assume_scsi() mess + (and thus get rid of the "virtio_guessed_disk_nature" here). + + Let's try a better approach instead: ISO files always have a magic + string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) + which we can use to decide whether we should try to boot in ISO 9660 + mode (which we should also try if we see a sector size of 2048). + + And if we were not able to boot in ISO mode here, the final boot attempt + before panicking is to boot in ECKD mode. Since this is our last boot + attempt anyway, simply always assume the ECKD geometry here (if the sector + size was not 4096 yet), so that we also do not depend on the guessed disk + geometry from virtio_blk_setup_device() here anymore. + + Message-Id: <20220704111903.62400-4-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index 56411ab3b6..994e59c0b0 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) + } + } + ++/** ++ * Detect whether we're trying to boot from an .ISO image. ++ * These always have a signature string "CD001" at offset 0x8001. ++ */ ++static bool has_iso_signature(void) ++{ ++ int blksize = virtio_get_block_size(); ++ ++ if (!blksize || virtio_read(0x8000 / blksize, sec)) { ++ return false; ++ } ++ ++ return !memcmp("CD001", &sec[1], 5); ++} ++ + /*********************************************************************** + * Bus specific IPL sequences + */ + + static void zipl_load_vblk(void) + { +- if (virtio_guessed_disk_nature()) { +- virtio_assume_iso9660(); ++ int blksize = virtio_get_block_size(); ++ ++ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { ++ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { ++ virtio_assume_iso9660(); ++ } ++ ipl_iso_el_torito(); + } +- ipl_iso_el_torito(); + +- if (virtio_guessed_disk_nature()) { ++ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { + sclp_print("Using guessed DASD geometry.\n"); + virtio_assume_eckd(); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch new file mode 100644 index 0000000..84bf0ce --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch @@ -0,0 +1,56 @@ +From 8b05a4aa32e5ae6cdbc16a5350f6df35d2d79efc Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 35/37] pc-bios/s390-ccw/virtio: Beautify the code for reading + virtqueue configuration + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [7/9] 52fb7fee7d7c46397f32e35bd5f92f82616dfb5c +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 070824885741f5d2a66626d3c4ecb2773c8e0552 +Author: Thomas Huth +Date: Mon Jul 4 13:18:59 2022 +0200 + + pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration + + It looks nicer if we separate the run_ccw() from the IPL_assert() + statement, and the error message should talk about "virtio device" + instead of "block device", since this code is nowadays used for + non-block (i.e. network) devices, too. + + Message-Id: <20220704111903.62400-9-thuth@redhat.com> + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index d8c2b52710..f37510f312 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) + .num = 0, + }; + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, +- "Could not get block device VQ configuration"); ++ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); ++ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); + info.num = config.num; + vring_init(&vdev->vrings[i], &info); + vdev->vrings[i].schid = vdev->schid; +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch new file mode 100644 index 0000000..9e9d8e6 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch @@ -0,0 +1,63 @@ +From 511d05f31824b375057ba8dea3f0343ce6e1c1e8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 29/37] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD + block size + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [1/9] 1053101fd5fb591131c567ff98c7d92b63a9dfa9 +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 +Author: Thomas Huth +Date: Mon Jul 4 13:18:53 2022 +0200 + + pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size + + Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. + + Message-Id: <20220704111903.62400-3-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + pc-bios/s390-ccw/virtio.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7d35050292..6483307630 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) + vdev->config.blk.physical_block_exp = 0; + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- vdev->config.blk.blk_size = 4096; ++ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; + break; + case VIRTIO_ID_SCSI: + vdev->config.blk.blk_size = vdev->scsi_block_size; +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 19fceb6495..9e410bde6f 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); + #define VIRTIO_SECTOR_SIZE 512 + #define VIRTIO_ISO_BLOCK_SIZE 2048 + #define VIRTIO_SCSI_BLOCK_SIZE 512 ++#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 + + static inline ulong virtio_sector_adjust(ulong sector) + { +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch new file mode 100644 index 0000000..53f125a --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch @@ -0,0 +1,67 @@ +From a60940fb7ef026f3aa968e77389efa51ea648ddf Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 34/37] pc-bios/s390-ccw/virtio: Read device config after + feature negotiation + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [6/9] 99ed8765d614207db19ded75d62c65171674d982 +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit aa5c69ce99411c4886bcd051f288afc02b6d968d +Author: Thomas Huth +Date: Mon Jul 4 13:18:58 2022 +0200 + + pc-bios/s390-ccw/virtio: Read device config after feature negotiation + + Feature negotiation should be done first, since some fields in the + config area can depend on the negotiated features and thus should + rather be read afterwards. + + While we're at it, also adjust the error message here a little bit + (the code is nowadays used for non-block virtio devices, too). + + Message-Id: <20220704111903.62400-8-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 4e85a2eb82..d8c2b52710 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) + rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); + IPL_assert(rc == 0, "Could not write DRIVER status to host"); + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, +- "Could not get block device configuration"); +- + /* Feature negotiation */ + for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { + feats.features = 0; +@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) + IPL_assert(rc == 0, "Could not set features bits"); + } + ++ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); ++ IPL_assert(rc == 0, "Could not get virtio device configuration"); ++ + for (i = 0; i < vdev->nr_vqs; i++) { + VqInfo info = { + .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch new file mode 100644 index 0000000..b25a352 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch @@ -0,0 +1,93 @@ +From 5cf01cccb7501c801fa9f21a021bc9e7d1fc56e3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 33/37] pc-bios/s390-ccw/virtio: Set missing status bits while + initializing + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [5/9] 6072245f49c229518246b4a0d1be360331305bfa +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c +Author: Thomas Huth +Date: Mon Jul 4 13:18:57 2022 +0200 + + pc-bios/s390-ccw/virtio: Set missing status bits while initializing + + According chapter "3.1.1 Driver Requirements: Device Initialization" + of the Virtio specification (v1.1), a driver for a device has to set + the ACKNOWLEDGE and DRIVER bits in the status field after resetting + the device. The s390-ccw bios skipped these steps so far and seems + like QEMU never cared. Anyway, it's better to follow the spec, so + let's set these bits now in the right spots, too. + + Message-Id: <20220704111903.62400-7-thuth@redhat.com> + Acked-by: Christian Borntraeger + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 5d2c6e3381..4e85a2eb82 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) + void virtio_setup_ccw(VDev *vdev) + { + int i, rc, cfg_size = 0; +- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; ++ uint8_t status; + struct VirtioFeatureDesc { + uint32_t features; + uint8_t index; +@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) + + run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); + ++ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); ++ + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_NET: + vdev->nr_vqs = 2; +@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) + default: + panic("Unsupported virtio device\n"); + } ++ ++ status |= VIRTIO_CONFIG_S_DRIVER; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER status to host"); ++ + IPL_assert( + run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, + "Could not get block device configuration"); +@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) + run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, + "Cannot set VQ info"); + } +- IPL_assert( +- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, +- "Could not write status to host"); ++ ++ status |= VIRTIO_CONFIG_S_DRIVER_OK; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); + } + + bool virtio_is_supported(SubChannelId schid) +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch new file mode 100644 index 0000000..ff8aab3 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch @@ -0,0 +1,101 @@ +From 5b3548c50e35729d724403b83e26579d31621367 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 32/37] pc-bios/s390-ccw/virtio-blkdev: Remove + virtio_assume_scsi() + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [4/9] 5256c4e6f4d5c5aedf1bad3fee30dd3ad230a3dd +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 5447de2619050a0a4dd480b97f88a9b58da360d1 +Author: Thomas Huth +Date: Mon Jul 4 13:18:56 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() + + The virtio_assume_scsi() function is very questionable: First, it + is only called for virtio-blk, and not for virtio-scsi, so the naming + is already quite confusing. Second, it is called if we detected a + "invalid" IPL disk, trying to fix it by blindly setting a sector + size of 512. This of course won't work in most cases since disks + might have a different sector size for a reason. + + Thus let's remove this strange function now. The calling code can + also be removed completely, since there is another spot in main.c + that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make + sure that we do not try to IPL from an invalid device. + + Message-Id: <20220704111903.62400-6-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ + pc-bios/s390-ccw/virtio.h | 1 - + 2 files changed, 25 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7e13155589..db1f7f44aa 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) + return virtio_get_device()->guessed_disk_nature; + } + +-void virtio_assume_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; +- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; +- vdev->config.blk.physical_block_exp = 0; +- vdev->blk_factor = 1; +- break; +- case VIRTIO_ID_SCSI: +- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; +- break; +- } +-} +- + void virtio_assume_iso9660(void) + { + VDev *vdev = virtio_get_device(); +@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: + sclp_print("Using virtio-blk.\n"); +- if (!virtio_ipl_disk_is_valid()) { +- /* make sure all getters but blocksize return 0 for +- * invalid IPL disk +- */ +- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); +- virtio_assume_scsi(); +- } + break; + case VIRTIO_ID_SCSI: + IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 241730effe..600ba5052b 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { + typedef enum guessed_disk_nature_type VirtioGDN; + + VirtioGDN virtio_guessed_disk_nature(void); +-void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch new file mode 100644 index 0000000..ade5ff2 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch @@ -0,0 +1,63 @@ +From 042e966a70789bd3ed450fa4f57016129a34672e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 37/37] pc-bios/s390-ccw/virtio-blkdev: Request the right + feature bits + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [9/9] f04835423d648b04f2187ef9890f2d1689e2b57e +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit 9125a314cca4a1838b09305a87d8efb98f80ab67 +Author: Thomas Huth +Date: Mon Jul 4 13:19:01 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits + + The virtio-blk code uses the block size and geometry fields in the + config area. According to the virtio-spec, these have to be negotiated + with the right feature bits during initialization, otherwise they + might not be available. QEMU is so far very forgiving and always + provides them, but we should not rely on this behavior, so let's + better request them properly via the VIRTIO_BLK_F_GEOMETRY and + VIRTIO_BLK_F_BLK_SIZE feature bits. + + Message-Id: <20220704111903.62400-11-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index c175b66a47..8271c47296 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -13,6 +13,9 @@ + #include "virtio.h" + #include "virtio-scsi.h" + ++#define VIRTIO_BLK_F_GEOMETRY (1 << 4) ++#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) ++ + static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, + int sec_num) + { +@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); + ++ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; + vdev->schid = schid; + virtio_setup_ccw(vdev); + +-- +2.35.3 + diff --git a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch new file mode 100644 index 0000000..1730dd3 --- /dev/null +++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch @@ -0,0 +1,124 @@ +From f09f2f12133073d6ccab3b2bd95717d435adc442 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 31/37] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix + virtio_ipl_disk_is_valid() + +RH-Author: Thomas Huth +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [3/9] ca0b836a417ce5bbd26e489551f573d6b2fc9e94 +RH-Bugzilla: 2098076 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098076 + +commit bbf615f7b707f009ef8e757d170902ad33b90644 +Author: Thomas Huth +Date: Mon Jul 4 13:18:55 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() + + The s390-ccw bios fails to boot if the boot disk is a virtio-blk + disk with a sector size of 4096. For example: + + dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX + fdasd -a /dev/dasdX + install a guest onto /dev/dasdX1 using virtio-blk + qemu-system-s390x -nographic -hda /dev/dasdX1 + + The bios then bails out with: + + ! Cannot read block 0 ! + + Looking at virtio_ipl_disk_is_valid() and especially the function + virtio_disk_is_scsi(), it does not really make sense that we expect + only such a limited disk geometry (like a block size of 512) for + our boot disks. Let's relax the check and allow everything that + remotely looks like a sane disk. + + Message-Id: <20220704111903.62400-5-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- + pc-bios/s390-ccw/virtio.h | 2 -- + 2 files changed, 7 insertions(+), 36 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 6483307630..7e13155589 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) + virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); + } + +-bool virtio_disk_is_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { +- return true; +- } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 255) +- && (vdev->config.blk.geometry.sectors == 63) +- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); +- case VIRTIO_ID_SCSI: +- return true; +- } +- return false; +-} +- +-bool virtio_disk_is_eckd(void) ++bool virtio_ipl_disk_is_valid(void) + { ++ int blksize = virtio_get_block_size(); + VDev *vdev = virtio_get_device(); +- const int block_size = virtio_get_block_size(); + +- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { ++ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || ++ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { + return true; + } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 15) +- && (vdev->config.blk.geometry.sectors == +- virtio_eckd_sectors_for_block_size(block_size)); +- case VIRTIO_ID_SCSI: +- return false; +- } +- return false; +-} + +-bool virtio_ipl_disk_is_valid(void) +-{ +- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); ++ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || ++ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && ++ blksize >= 512 && blksize <= 4096; + } + + int virtio_get_block_size(void) +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 9e410bde6f..241730effe 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-extern bool virtio_disk_is_scsi(void); +-extern bool virtio_disk_is_eckd(void); + extern bool virtio_ipl_disk_is_valid(void); + extern int virtio_get_block_size(void); + extern uint8_t virtio_get_heads(void); +-- +2.35.3 + diff --git a/SOURCES/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch b/SOURCES/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch new file mode 100644 index 0000000..817f0ab --- /dev/null +++ b/SOURCES/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch @@ -0,0 +1,83 @@ +From 7998e8aa78caa35c2ab2da44f9e29e21d7548c61 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 05/18] pci: expose TYPE_XIO3130_DOWNSTREAM name + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [1/2] f09ddcaf686f22b545bf269f87787ebfc33fccda (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit c41481af9a5d0d463607cc45b45c510875570817 +Author: Igor Mammedov +Date: Tue Mar 1 10:11:58 2022 -0500 + + pci: expose TYPE_XIO3130_DOWNSTREAM name + + Type name will be used in followup patch for cast check + in pcihp code. + + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c41481af9a5d0d463607cc45b45c510875570817) +Signed-off-by: Jon Maloy +--- + hw/pci-bridge/xio3130_downstream.c | 3 ++- + include/hw/pci-bridge/xio3130_downstream.h | 15 +++++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + create mode 100644 include/hw/pci-bridge/xio3130_downstream.h + +diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c +index 04aae72cd6..b17cafd359 100644 +--- a/hw/pci-bridge/xio3130_downstream.c ++++ b/hw/pci-bridge/xio3130_downstream.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + + #define PCI_DEVICE_ID_TI_XIO3130D 0x8233 /* downstream port */ + #define XIO3130_REVISION 0x1 +@@ -173,7 +174,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data) + } + + static const TypeInfo xio3130_downstream_info = { +- .name = "xio3130-downstream", ++ .name = TYPE_XIO3130_DOWNSTREAM, + .parent = TYPE_PCIE_SLOT, + .class_init = xio3130_downstream_class_init, + .interfaces = (InterfaceInfo[]) { +diff --git a/include/hw/pci-bridge/xio3130_downstream.h b/include/hw/pci-bridge/xio3130_downstream.h +new file mode 100644 +index 0000000000..1d10139aea +--- /dev/null ++++ b/include/hw/pci-bridge/xio3130_downstream.h +@@ -0,0 +1,15 @@ ++/* ++ * TI X3130 pci express downstream port switch ++ * ++ * Copyright (C) 2022 Igor Mammedov ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++#define HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++ ++#define TYPE_XIO3130_DOWNSTREAM "xio3130-downstream" ++ ++#endif ++ +-- +2.27.0 + diff --git a/SOURCES/kvm-physmem-add-missing-memory-barrier.patch b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch new file mode 100644 index 0000000..f6a2137 --- /dev/null +++ b/SOURCES/kvm-physmem-add-missing-memory-barrier.patch @@ -0,0 +1,55 @@ +From 01c09f31978154f0d2fd699621ae958a8c3ea2a5 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:24 -0500 +Subject: [PATCH 08/13] physmem: add missing memory barrier + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [8/10] f6a9659f7cf40b78de6e85e4a7c06842273aa770 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 33828ca11da08436e1b32f3e79dabce3061a0427 +Author: Paolo Bonzini +Date: Fri Mar 3 14:36:32 2023 +0100 + + physmem: add missing memory barrier + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + softmmu/physmem.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 4d0ef5f92f..2b96fad302 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -3087,6 +3087,8 @@ void cpu_register_map_client(QEMUBH *bh) + qemu_mutex_lock(&map_client_list_lock); + client->bh = bh; + QLIST_INSERT_HEAD(&map_client_list, client, link); ++ /* Write map_client_list before reading in_use. */ ++ smp_mb(); + if (!qatomic_read(&bounce.in_use)) { + cpu_notify_map_clients_locked(); + } +@@ -3279,6 +3281,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, + qemu_vfree(bounce.buffer); + bounce.buffer = NULL; + memory_region_unref(bounce.mr); ++ /* Clear in_use before reading map_client_list. */ + qatomic_mb_set(&bounce.in_use, false); + cpu_notify_map_clients(); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch new file mode 100644 index 0000000..5ef458c --- /dev/null +++ b/SOURCES/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch @@ -0,0 +1,214 @@ +From d0cd7be4d347ebe118eb8f3f2fc2eb3e3eb77e3a Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Thu, 20 Jan 2022 17:31:04 -0500 +Subject: [PATCH 5/7] qapi: Cleanup SGX related comments and restore + @section-size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [5/5] 497dbeaebb7b8f99f5f8a7de58000dcab0d0c22d +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The SGX NUMA patches were merged into Qemu 7.0 release, we need +clarify detailed version history information and also change +some related comments, which make SGX related comments clearer. + +The QMP command schema promises backwards compatibility as standard. +We temporarily restore "@section-size", which can avoid incompatible +API breakage. The "@section-size" will be deprecated in 7.2 version. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Yang Zhong +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a66bd91f030827742778a9e0da19fe55716b4a60) +Signed-off-by: Paul Lai +--- + docs/about/deprecated.rst | 13 +++++++++++++ + hw/i386/sgx.c | 11 +++++++++-- + qapi/machine.json | 4 ++-- + qapi/misc-target.json | 22 +++++++++++++++++----- + 4 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index ff7488cb63..33925edf45 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that + all arguments passed to ``device_add`` are consistent with the documented + property types. + ++``query-sgx`` return value member ``section-size`` (since 7.0) ++'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ ++ ++``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) ++''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ + System accelerators + ------------------- + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 5de5dd0893..a2b318dd93 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static SGXEPCSectionList *sgx_calc_host_epc_sections(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) + { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; +@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); ++ *size += section->size; + QAPI_LIST_APPEND(tail, section); + } + +@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + { + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; ++ uint64_t size = 0; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { +@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->sections = sgx_calc_host_epc_sections(); ++ info->sections = sgx_calc_host_epc_sections(&size); ++ info->section_size = size; + + close(fd); + +@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + ++ SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; ++ info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); + + return info; +@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); ++ monitor_printf(mon, "size: %" PRIu64 "\n", ++ info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { +diff --git a/qapi/machine.json b/qapi/machine.json +index 16e771affc..a9f33d0f27 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,7 +1207,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +@@ -1288,7 +1288,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 1022aa0184..4bc45d2474 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -344,9 +344,9 @@ + # + # @node: the numa node + # +-# @size: the size of epc section ++# @size: the size of EPC section + # +-# Since: 6.2 ++# Since: 7.0 + ## + { 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', +@@ -365,7 +365,13 @@ + # + # @flc: true if FLC is supported + # +-# @sections: The EPC sections info for guest ++# @section-size: The EPC section size for guest ++# Redundant with @sections. Just for backward compatibility. ++# ++# @sections: The EPC sections info for guest (Since: 7.0) ++# ++# Features: ++# @deprecated: Member @section-size is deprecated. Use @sections instead. + # + # Since: 6.2 + ## +@@ -374,6 +380,8 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', ++ 'section-size': { 'type': 'uint64', ++ 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + +@@ -390,7 +398,9 @@ + # + # -> { "execute": "query-sgx" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "sections": [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +@@ -408,7 +418,9 @@ + # + # -> { "execute": "query-sgx-capabilities" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "section" : [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +-- +2.27.0 + diff --git a/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch new file mode 100644 index 0000000..3992f4e --- /dev/null +++ b/SOURCES/kvm-qatomic-add-smp_mb__before-after_rmw.patch @@ -0,0 +1,177 @@ +From e7d0e29d1962092af58d0445439671a6e1d91f71 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:10:33 -0500 +Subject: [PATCH 02/13] qatomic: add smp_mb__before/after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [2/10] 1f87eb3157abcf23f020881cedce42f76497f348 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit ff00bed1897c3d27adc5b0cec6f6eeb5a7d13176 +Author: Paolo Bonzini +Date: Thu Mar 2 11:10:56 2023 +0100 + + qatomic: add smp_mb__before/after_rmw() + + On ARM, seqcst loads and stores (which QEMU does not use) are compiled + respectively as LDAR and STLR instructions. Even though LDAR is + also used for load-acquire operations, it also waits for all STLRs to + leave the store buffer. Thus, LDAR and STLR alone are load-acquire + and store-release operations, but LDAR also provides store-against-load + ordering as long as the previous store is a STLR. + + Compare this to ARMv7, where store-release is DMB+STR and load-acquire + is LDR+DMB, but an additional DMB is needed between store-seqcst and + load-seqcst (e.g. DMB+STR+DMB+LDR+DMB); or with x86, where MOV provides + load-acquire and store-release semantics and the two can be reordered. + + Likewise, on ARM sequentially consistent read-modify-write operations only + need to use LDAXR and STLXR respectively for the load and the store, while + on x86 they need to use the stronger LOCK prefix. + + In a strange twist of events, however, the _stronger_ semantics + of the ARM instructions can end up causing bugs on ARM, not on x86. + The problems occur when seqcst atomics are mixed with relaxed atomics. + + QEMU's atomics try to bridge the Linux API (that most of the developers + are familiar with) and the C11 API, and the two have a substantial + difference: + + - in Linux, strongly-ordered atomics such as atomic_add_return() affect + the global ordering of _all_ memory operations, including for example + READ_ONCE()/WRITE_ONCE() + + - in C11, sequentially consistent atomics (except for seq-cst fences) + only affect the ordering of sequentially consistent operations. + In particular, since relaxed loads are done with LDR on ARM, they are + not ordered against seqcst stores (which are done with STLR). + + QEMU implements high-level synchronization primitives with the idea that + the primitives contain the necessary memory barriers, and the callers can + use relaxed atomics (qatomic_read/qatomic_set) or even regular accesses. + This is very much incompatible with the C11 view that seqcst accesses + are only ordered against other seqcst accesses, and requires using seqcst + fences as in the following example: + + qatomic_set(&y, 1); qatomic_set(&x, 1); + smp_mb(); smp_mb(); + ... qatomic_read(&x) ... ... qatomic_read(&y) ... + + When a qatomic_*() read-modify write operation is used instead of one + or both stores, developers that are more familiar with the Linux API may + be tempted to omit the smp_mb(), which will work on x86 but not on ARM. + + This nasty difference between Linux and C11 read-modify-write operations + has already caused issues in util/async.c and more are being found. + Provide something similar to Linux smp_mb__before/after_atomic(); this + has the double function of documenting clearly why there is a memory + barrier, and avoiding a double barrier on x86 and s390x systems. + + The new macro can already be put to use in qatomic_mb_set(). + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + docs/devel/atomics.rst | 26 +++++++++++++++++++++----- + include/qemu/atomic.h | 17 ++++++++++++++++- + 2 files changed, 37 insertions(+), 6 deletions(-) + +diff --git a/docs/devel/atomics.rst b/docs/devel/atomics.rst +index 52baa0736d..10fbfc58bb 100644 +--- a/docs/devel/atomics.rst ++++ b/docs/devel/atomics.rst +@@ -25,7 +25,8 @@ provides macros that fall in three camps: + + - weak atomic access and manual memory barriers: ``qatomic_read()``, + ``qatomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, +- ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``; ++ ``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``, ++ ``smp_mb__before_rmw()``, ``smp_mb__after_rmw()``; + + - sequentially consistent atomic access: everything else. + +@@ -470,7 +471,7 @@ and memory barriers, and the equivalents in QEMU: + sequential consistency. + + - in QEMU, ``qatomic_read()`` and ``qatomic_set()`` do not participate in +- the total ordering enforced by sequentially-consistent operations. ++ the ordering enforced by read-modify-write operations. + This is because QEMU uses the C11 memory model. The following example + is correct in Linux but not in QEMU: + +@@ -486,9 +487,24 @@ and memory barriers, and the equivalents in QEMU: + because the read of ``y`` can be moved (by either the processor or the + compiler) before the write of ``x``. + +- Fixing this requires an ``smp_mb()`` memory barrier between the write +- of ``x`` and the read of ``y``. In the common case where only one thread +- writes ``x``, it is also possible to write it like this: ++ Fixing this requires a full memory barrier between the write of ``x`` and ++ the read of ``y``. QEMU provides ``smp_mb__before_rmw()`` and ++ ``smp_mb__after_rmw()``; they act both as an optimization, ++ avoiding the memory barrier on processors where it is unnecessary, ++ and as a clarification of this corner case of the C11 memory model: ++ ++ +--------------------------------+ ++ | QEMU (correct) | ++ +================================+ ++ | :: | ++ | | ++ | a = qatomic_fetch_add(&x, 2);| ++ | smp_mb__after_rmw(); | ++ | b = qatomic_read(&y); | ++ +--------------------------------+ ++ ++ In the common case where only one thread writes ``x``, it is also possible ++ to write it like this: + + +--------------------------------+ + | QEMU (correct) | +diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h +index 112a29910b..7855443cab 100644 +--- a/include/qemu/atomic.h ++++ b/include/qemu/atomic.h +@@ -243,6 +243,20 @@ + #define smp_wmb() smp_mb_release() + #define smp_rmb() smp_mb_acquire() + ++/* ++ * SEQ_CST is weaker than the older __sync_* builtins and Linux ++ * kernel read-modify-write atomics. Provide a macro to obtain ++ * the same semantics. ++ */ ++#if !defined(QEMU_SANITIZE_THREAD) && \ ++ (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) ++# define smp_mb__before_rmw() signal_barrier() ++# define smp_mb__after_rmw() signal_barrier() ++#else ++# define smp_mb__before_rmw() smp_mb() ++# define smp_mb__after_rmw() smp_mb() ++#endif ++ + /* qatomic_mb_read/set semantics map Java volatile variables. They are + * less expensive on some platforms (notably POWER) than fully + * sequentially consistent operations. +@@ -257,7 +271,8 @@ + #if !defined(__SANITIZE_THREAD__) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) + /* This is more efficient than a store plus a fence. */ +-# define qatomic_mb_set(ptr, i) ((void)qatomic_xchg(ptr, i)) ++# define qatomic_mb_set(ptr, i) \ ++ ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) + #else + # define qatomic_mb_set(ptr, i) \ + ({ qatomic_store_release(ptr, i); smp_mb(); }) +-- +2.37.3 + diff --git a/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch new file mode 100644 index 0000000..8ed0d2e --- /dev/null +++ b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -0,0 +1,162 @@ +From 552e7c8ae2c6e281a72791aefa1729be86f96642 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:52 +0200 +Subject: [PATCH 5/6] qcow2: Add errp to rebuild_refcount_structure() + +RH-Author: Hanna Reitz +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 9dddd1d21383c4cbd528e5a0d42b0c2a7d87c8f6 +RH-Bugzilla: 1519071 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake + +Instead of fprint()-ing error messages in rebuild_refcount_structure() +and its rebuild_refcounts_write_refblocks() helper, pass them through an +Error object to qcow2_check_refcounts() (which will then print it). + +Suggested-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-4-hreitz@redhat.com> +Reviewed-by: Eric Blake +(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index 555d8ba5ac..09f8ef4927 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2462,7 +2462,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, +- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, ++ Error **errp + ) + { + BDRVQcow2State *s = bs->opaque; +@@ -2513,8 +2514,8 @@ static int rebuild_refcounts_write_refblocks( + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); ++ error_setg_errno(errp, -refblock_offset, ++ "ERROR allocating refblock"); + return refblock_offset; + } + +@@ -2536,6 +2537,7 @@ static int rebuild_refcounts_write_refblocks( + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { ++ error_setg(errp, "ERROR allocating reftable memory"); + return -ENOMEM; + } + +@@ -2559,7 +2561,7 @@ static int rebuild_refcounts_write_refblocks( + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2575,7 +2577,7 @@ static int rebuild_refcounts_write_refblocks( + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2598,7 +2600,8 @@ static int rebuild_refcounts_write_refblocks( + static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, +- int64_t *nb_clusters) ++ int64_t *nb_clusters, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + int64_t reftable_offset = -1; +@@ -2649,7 +2652,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2673,8 +2676,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); ++ error_setg_errno(errp, -reftable_offset, ++ "ERROR allocating reftable"); + res->check_errors++; + ret = reftable_offset; + goto fail; +@@ -2692,7 +2695,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2722,7 +2725,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2730,7 +2733,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_length); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2743,7 +2746,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { +- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR setting reftable"); + goto fail; + } + +@@ -2811,11 +2814,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; ++ Error *local_err = NULL; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, +- &nb_clusters); ++ &nb_clusters, &local_err); + if (ret < 0) { ++ error_report_err(local_err); + goto fail; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch new file mode 100644 index 0000000..a57bf63 --- /dev/null +++ b/SOURCES/kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch @@ -0,0 +1,67 @@ +From 06c73c4b57dd1f47f819d719a63eb39fbe799304 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:51 +0100 +Subject: [PATCH 1/4] qcow2: Fix theoretical corruption in store_bitmap() error + path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [1/4] d0a26bed7b16db41e7baee1f8f2b3ae54e52dd52 + +In order to write the bitmap table to the image file, it is converted to +big endian. If the write fails, it is passed to clear_bitmap_table() to +free all of the clusters it had allocated before. However, if we don't +convert it back to native endianness first, we'll free things at a wrong +offset. + +In practical terms, the offsets will be so high that we won't actually +free any allocated clusters, but just run into an error, but in theory +this can cause image corruption. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-2-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit b03dd9613bcf8fe948581b2b3585510cb525c382) +Signed-off-by: Kevin Wolf +--- + block/qcow2-bitmap.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c +index 8fb4731551..869069415c 100644 +--- a/block/qcow2-bitmap.c ++++ b/block/qcow2-bitmap.c +@@ -115,7 +115,7 @@ static int update_header_sync(BlockDriverState *bs) + return bdrv_flush(bs->file->bs); + } + +-static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size) ++static inline void bitmap_table_bswap_be(uint64_t *bitmap_table, size_t size) + { + size_t i; + +@@ -1401,9 +1401,10 @@ static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp) + goto fail; + } + +- bitmap_table_to_be(tb, tb_size); ++ bitmap_table_bswap_be(tb, tb_size); + ret = bdrv_pwrite(bs->file, tb_offset, tb, tb_size * sizeof(tb[0])); + if (ret < 0) { ++ bitmap_table_bswap_be(tb, tb_size); + error_setg_errno(errp, -ret, "Failed to write bitmap '%s' to file", + bm_name); + goto fail; +-- +2.37.3 + diff --git a/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch new file mode 100644 index 0000000..efae75f --- /dev/null +++ b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -0,0 +1,465 @@ +From be54c6206b0f0a19e0ffe6a058f4f97277027a17 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:50 +0200 +Subject: [PATCH 3/6] qcow2: Improve refcount structure rebuilding + +RH-Author: Hanna Reitz +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] 0bb78f7735a0730204670ae5ec2e040ad1d23942 +RH-Bugzilla: 1519071 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake + +When rebuilding the refcount structures (when qemu-img check -r found +errors with refcount = 0, but reference count > 0), the new refcount +table defaults to being put at the image file end[1]. There is no good +reason for that except that it means we will not have to rewrite any +refblocks we already wrote to disk. + +Changing the code to rewrite those refblocks is not too difficult, +though, so let us do that. That is beneficial for images on block +devices, where we cannot really write beyond the end of the image file. + +Use this opportunity to add extensive comments to the code, and refactor +it a bit, getting rid of the backwards-jumping goto. + +[1] Unless there is something allocated in the area pointed to by the + last refblock, so we have to write that refblock. In that case, we + try to put the reftable in there. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 +Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-2-hreitz@redhat.com> +(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ + 1 file changed, 235 insertions(+), 97 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index 4614572252..555d8ba5ac 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2435,111 +2435,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + } + + /* +- * Creates a new refcount structure based solely on the in-memory information +- * given through *refcount_table. All necessary allocations will be reflected +- * in that array. ++ * Helper function for rebuild_refcount_structure(). + * +- * On success, the old refcount structure is leaked (it will be covered by the +- * new refcount structure). ++ * Scan the range of clusters [first_cluster, end_cluster) for allocated ++ * clusters and write all corresponding refblocks to disk. The refblock ++ * and allocation data is taken from the in-memory refcount table ++ * *refcount_table[] (of size *nb_clusters), which is basically one big ++ * (unlimited size) refblock for the whole image. ++ * ++ * For these refblocks, clusters are allocated using said in-memory ++ * refcount table. Care is taken that these allocations are reflected ++ * in the refblocks written to disk. ++ * ++ * The refblocks' offsets are written into a reftable, which is ++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If ++ * that reftable is of insufficient size, it will be resized to fit. ++ * This reftable is not written to disk. ++ * ++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed ++ * to point to existing valid refblocks that do not need to be allocated ++ * again.) ++ * ++ * Return whether the on-disk reftable array was resized (true/false), ++ * or -errno on error. + */ +-static int rebuild_refcount_structure(BlockDriverState *bs, +- BdrvCheckResult *res, +- void **refcount_table, +- int64_t *nb_clusters) ++static int rebuild_refcounts_write_refblocks( ++ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, ++ int64_t first_cluster, int64_t end_cluster, ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ ) + { + BDRVQcow2State *s = bs->opaque; +- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; ++ int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; +- uint32_t reftable_size = 0; +- uint64_t *on_disk_reftable = NULL; ++ int64_t first_free_cluster = 0; ++ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; ++ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; +- int ret = 0; +- struct { +- uint64_t reftable_offset; +- uint32_t reftable_clusters; +- } QEMU_PACKED reftable_offset_and_clusters; +- +- qcow2_cache_empty(bs, s->refcount_block_cache); ++ bool reftable_grown = false; ++ int ret; + +-write_refblocks: +- for (; cluster < *nb_clusters; cluster++) { ++ for (cluster = first_cluster; cluster < end_cluster; cluster++) { ++ /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + ++ /* ++ * This cluster is allocated, so we need to create a refblock ++ * for it. The data we will write to disk is just the ++ * respective slice from *refcount_table, so it will contain ++ * accurate refcounts for all clusters belonging to this ++ * refblock. After we have written it, we will therefore skip ++ * all remaining clusters in this refblock. ++ */ ++ + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + +- /* Don't allocate a cluster in a refblock already written to disk */ +- if (first_free_cluster < refblock_start) { +- first_free_cluster = refblock_start; +- } +- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, +- nb_clusters, &first_free_cluster); +- if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); +- res->check_errors++; +- ret = refblock_offset; +- goto fail; +- } ++ if (on_disk_reftable_entries > refblock_index && ++ on_disk_reftable[refblock_index]) ++ { ++ /* ++ * We can get here after a `goto write_refblocks`: We have a ++ * reftable from a previous run, and the refblock is already ++ * allocated. No need to allocate it again. ++ */ ++ refblock_offset = on_disk_reftable[refblock_index]; ++ } else { ++ int64_t refblock_cluster_index; + +- if (reftable_size <= refblock_index) { +- uint32_t old_reftable_size = reftable_size; +- uint64_t *new_on_disk_reftable; ++ /* Don't allocate a cluster in a refblock already written to disk */ ++ if (first_free_cluster < refblock_start) { ++ first_free_cluster = refblock_start; ++ } ++ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, ++ nb_clusters, ++ &first_free_cluster); ++ if (refblock_offset < 0) { ++ fprintf(stderr, "ERROR allocating refblock: %s\n", ++ strerror(-refblock_offset)); ++ return refblock_offset; ++ } + +- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, +- s->cluster_size) / REFTABLE_ENTRY_SIZE; +- new_on_disk_reftable = g_try_realloc(on_disk_reftable, +- reftable_size * +- REFTABLE_ENTRY_SIZE); +- if (!new_on_disk_reftable) { +- res->check_errors++; +- ret = -ENOMEM; +- goto fail; ++ refblock_cluster_index = refblock_offset / s->cluster_size; ++ if (refblock_cluster_index >= end_cluster) { ++ /* ++ * We must write the refblock that holds this refblock's ++ * refcount ++ */ ++ end_cluster = refblock_cluster_index + 1; + } +- on_disk_reftable = new_on_disk_reftable; + +- memset(on_disk_reftable + old_reftable_size, 0, +- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); ++ if (on_disk_reftable_entries <= refblock_index) { ++ on_disk_reftable_entries = ++ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, ++ s->cluster_size) / REFTABLE_ENTRY_SIZE; ++ on_disk_reftable = ++ g_try_realloc(on_disk_reftable, ++ on_disk_reftable_entries * ++ REFTABLE_ENTRY_SIZE); ++ if (!on_disk_reftable) { ++ return -ENOMEM; ++ } + +- /* The offset we have for the reftable is now no longer valid; +- * this will leak that range, but we can easily fix that by running +- * a leak-fixing check after this rebuild operation */ +- reftable_offset = -1; +- } else { +- assert(on_disk_reftable); +- } +- on_disk_reftable[refblock_index] = refblock_offset; ++ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, ++ (on_disk_reftable_entries - ++ *on_disk_reftable_entries_ptr) * ++ REFTABLE_ENTRY_SIZE); + +- /* If this is apparently the last refblock (for now), try to squeeze the +- * reftable in */ +- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && +- reftable_offset < 0) +- { +- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * +- REFTABLE_ENTRY_SIZE); +- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, +- refcount_table, nb_clusters, +- &first_free_cluster); +- if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); +- res->check_errors++; +- ret = reftable_offset; +- goto fail; ++ *on_disk_reftable_ptr = on_disk_reftable; ++ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; ++ ++ reftable_grown = true; ++ } else { ++ assert(on_disk_reftable); + } ++ on_disk_reftable[refblock_index] = refblock_offset; + } + ++ /* Refblock is allocated, write it to disk */ ++ + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* The size of *refcount_table is always cluster-aligned, therefore the +- * write operation will not overflow */ ++ /* ++ * The refblock is simply a slice of *refcount_table. ++ * Note that the size of *refcount_table is always aligned to ++ * whole clusters, so the write operation will not result in ++ * out-of-bounds accesses. ++ */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + +@@ -2547,23 +2576,99 @@ write_refblocks: + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* Go to the end of this refblock */ ++ /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + +- if (reftable_offset < 0) { +- uint64_t post_refblock_start, reftable_clusters; ++ return reftable_grown; ++} ++ ++/* ++ * Creates a new refcount structure based solely on the in-memory information ++ * given through *refcount_table (this in-memory information is basically just ++ * the concatenation of all refblocks). All necessary allocations will be ++ * reflected in that array. ++ * ++ * On success, the old refcount structure is leaked (it will be covered by the ++ * new refcount structure). ++ */ ++static int rebuild_refcount_structure(BlockDriverState *bs, ++ BdrvCheckResult *res, ++ void **refcount_table, ++ int64_t *nb_clusters) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ int64_t reftable_offset = -1; ++ int64_t reftable_length = 0; ++ int64_t reftable_clusters; ++ int64_t refblock_index; ++ uint32_t on_disk_reftable_entries = 0; ++ uint64_t *on_disk_reftable = NULL; ++ int ret = 0; ++ int reftable_size_changed = 0; ++ struct { ++ uint64_t reftable_offset; ++ uint32_t reftable_clusters; ++ } QEMU_PACKED reftable_offset_and_clusters; ++ ++ qcow2_cache_empty(bs, s->refcount_block_cache); ++ ++ /* ++ * For each refblock containing entries, we try to allocate a ++ * cluster (in the in-memory refcount table) and write its offset ++ * into on_disk_reftable[]. We then write the whole refblock to ++ * disk (as a slice of the in-memory refcount table). ++ * This is done by rebuild_refcounts_write_refblocks(). ++ * ++ * Once we have scanned all clusters, we try to find space for the ++ * reftable. This will dirty the in-memory refcount table (i.e. ++ * make it differ from the refblocks we have already written), so we ++ * need to run rebuild_refcounts_write_refblocks() again for the ++ * range of clusters where the reftable has been allocated. ++ * ++ * This second run might make the reftable grow again, in which case ++ * we will need to allocate another space for it, which is why we ++ * repeat all this until the reftable stops growing. ++ * ++ * (This loop will terminate, because with every cluster the ++ * reftable grows, it can accomodate a multitude of more refcounts, ++ * so that at some point this must be able to cover the reftable ++ * and all refblocks describing it.) ++ * ++ * We then convert the reftable to big-endian and write it to disk. ++ * ++ * Note that we never free any reftable allocations. Doing so would ++ * needlessly complicate the algorithm: The eventual second check ++ * run we do will clean up all leaks we have caused. ++ */ ++ ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ 0, *nb_clusters, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * There was no reftable before, so rebuild_refcounts_write_refblocks() ++ * must have increased its size (from 0 to something). ++ */ ++ assert(reftable_size_changed); ++ ++ do { ++ int64_t reftable_start_cluster, reftable_end_cluster; ++ int64_t first_free_cluster = 0; ++ ++ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; ++ reftable_clusters = size_to_clusters(s, reftable_length); + +- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); +- reftable_clusters = +- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); +- /* Not pretty but simple */ +- if (first_free_cluster < post_refblock_start) { +- first_free_cluster = post_refblock_start; +- } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); +@@ -2575,24 +2680,55 @@ write_refblocks: + goto fail; + } + +- goto write_refblocks; +- } ++ /* ++ * We need to update the affected refblocks, so re-run the ++ * write_refblocks loop for the reftable's range of clusters. ++ */ ++ assert(offset_into_cluster(s, reftable_offset) == 0); ++ reftable_start_cluster = reftable_offset / s->cluster_size; ++ reftable_end_cluster = reftable_start_cluster + reftable_clusters; ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ reftable_start_cluster, ++ reftable_end_cluster, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * If the reftable size has changed, we will need to find a new ++ * allocation, repeating the loop. ++ */ ++ } while (reftable_size_changed); + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ /* The above loop must have run at least once */ ++ assert(reftable_offset >= 0); ++ ++ /* ++ * All allocations are done, all refblocks are written, convert the ++ * reftable to big-endian and write it to disk. ++ */ ++ ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + +- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, +- reftable_size * REFTABLE_ENTRY_SIZE, ++ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + +- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); ++ assert(reftable_length < INT_MAX); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, +- reftable_size * REFTABLE_ENTRY_SIZE); ++ reftable_length); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; +@@ -2601,7 +2737,7 @@ write_refblocks: + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = +- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); ++ cpu_to_be32(reftable_clusters); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, +@@ -2611,12 +2747,14 @@ write_refblocks: + goto fail; + } + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; +- s->refcount_table_size = reftable_size; ++ s->refcount_table_size = on_disk_reftable_entries; + update_max_refcount_table_index(s); + + return 0; +-- +2.27.0 + diff --git a/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch new file mode 100644 index 0000000..95933af --- /dev/null +++ b/SOURCES/kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch @@ -0,0 +1,75 @@ +From 2f03293910f3ac559f37d45c95325ae29638003a Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:15:14 -0500 +Subject: [PATCH 07/13] qemu-coroutine-lock: add smp_mb__after_rmw() + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [7/10] 9cf1b6d3b0dd154489e75ad54a3000ea58983960 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit e3a3b6ec8169eab2feb241b4982585001512cd55 +Author: Paolo Bonzini +Date: Fri Mar 3 10:52:59 2023 +0100 + + qemu-coroutine-lock: add smp_mb__after_rmw() + + mutex->from_push and mutex->handoff in qemu-coroutine-lock implement + the familiar pattern: + + write a write b + smp_mb() smp_mb() + read b read a + + The memory barrier is required by the C memory model even after a + SEQ_CST read-modify-write operation such as QSLIST_INSERT_HEAD_ATOMIC. + Add it and avoid the unclear qatomic_mb_read() operation. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-coroutine-lock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c +index 2669403839..a03ed0e664 100644 +--- a/util/qemu-coroutine-lock.c ++++ b/util/qemu-coroutine-lock.c +@@ -206,10 +206,16 @@ static void coroutine_fn qemu_co_mutex_lock_slowpath(AioContext *ctx, + trace_qemu_co_mutex_lock_entry(mutex, self); + push_waiter(mutex, &w); + ++ /* ++ * Add waiter before reading mutex->handoff. Pairs with qatomic_mb_set ++ * in qemu_co_mutex_unlock. ++ */ ++ smp_mb__after_rmw(); ++ + /* This is the "Responsibility Hand-Off" protocol; a lock() picks from + * a concurrent unlock() the responsibility of waking somebody up. + */ +- old_handoff = qatomic_mb_read(&mutex->handoff); ++ old_handoff = qatomic_read(&mutex->handoff); + if (old_handoff && + has_waiters(mutex) && + qatomic_cmpxchg(&mutex->handoff, old_handoff, 0) == old_handoff) { +@@ -308,6 +314,7 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) + } + + our_handoff = mutex->sequence; ++ /* Set handoff before checking for waiters. */ + qatomic_mb_set(&mutex->handoff, our_handoff); + if (!has_waiters(mutex)) { + /* The concurrent lock has not added itself yet, so it +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..ae9850e --- /dev/null +++ b/SOURCES/kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,70 @@ +From 648193b48d8aeaded90fd657e3610d8040f505fc Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:53 +0100 +Subject: [PATCH 3/4] qemu-img bitmap: Report errors while closing the image +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [3/4] 8e13e09564718a0badd03af84f036246a46a0eba + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img bitmap' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1330 +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-4-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c5e477110dcb8ef4642dce399777c3dee68fa96c) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index 18833f7d69..7d035c0c7f 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -4622,6 +4622,7 @@ static int img_bitmap(int argc, char **argv) + QSIMPLEQ_HEAD(, ImgBitmapAction) actions; + ImgBitmapAction *act, *act_next; + const char *op; ++ int inactivate_ret; + + QSIMPLEQ_INIT(&actions); + +@@ -4806,6 +4807,16 @@ static int img_bitmap(int argc, char **argv) + ret = 0; + + out: ++ /* ++ * Manually inactivate the images first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ inactivate_ret = bdrv_inactivate_all(); ++ if (inactivate_ret < 0) { ++ error_report("Error while closing the image: %s", strerror(-inactivate_ret)); ++ ret = 1; ++ } ++ + blk_unref(src); + blk_unref(blk); + qemu_opts_del(opts); +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch new file mode 100644 index 0000000..32d3d9d --- /dev/null +++ b/SOURCES/kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch @@ -0,0 +1,67 @@ +From 2396df7fe527567e8e78761ef24ea1057ef6fa48 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:52 +0100 +Subject: [PATCH 2/4] qemu-img commit: Report errors while closing the image + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [2/4] 28f95bf76d1d63e2b0bed0c2ba5206bd3e5ea4f8 + +blk_unref() can't report any errors that happen while closing the image. +For example, if qcow2 hits an -ENOSPC error while writing out dirty +bitmaps when it's closed, it prints error messages to stderr, but +'qemu-img commit' won't see any error return value and will therefore +look successful with exit code 0. + +In order to fix this, manually inactivate the image first before calling +blk_unref(). This already performs the operations that would be most +likely to fail while closing the image, but it can still return errors. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-3-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 44efba2d713aca076c411594d0c1a2b99155eeb3) +Signed-off-by: Kevin Wolf +--- + qemu-img.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/qemu-img.c b/qemu-img.c +index f036a1d428..18833f7d69 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -443,6 +443,11 @@ static BlockBackend *img_open(bool image_opts, + blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet, + force_share); + } ++ ++ if (blk) { ++ blk_set_force_allow_inactivate(blk); ++ } ++ + return blk; + } + +@@ -1110,6 +1115,14 @@ unref_backing: + done: + qemu_progress_end(); + ++ /* ++ * Manually inactivate the image first because this way we can know whether ++ * an error occurred. blk_unref() doesn't tell us about failures. ++ */ ++ ret = bdrv_inactivate_all(); ++ if (ret < 0 && !local_err) { ++ error_setg_errno(&local_err, -ret, "Error while closing the image"); ++ } + blk_unref(blk); + + if (local_err) { +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch new file mode 100644 index 0000000..31e60a3 --- /dev/null +++ b/SOURCES/kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch @@ -0,0 +1,166 @@ +From 7c6faae20638f58681df223e0ca44e0a6cb60d2d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 12 Jan 2023 20:14:54 +0100 +Subject: [PATCH 4/4] qemu-iotests: Test qemu-img bitmap/commit exit code on + error + +RH-Author: Kevin Wolf +RH-MergeRequest: 251: qemu-img: Fix exit code for errors closing the image +RH-Bugzilla: 2147617 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella +RH-Commit: [4/4] fb2f9de98ddd2ee1d745119e4f15272ef44e0aae + +This tests that when an error happens while writing back bitmaps to the +image file in qcow2_inactivate(), 'qemu-img bitmap/commit' actually +return an error value in their exit code instead of making the operation +look successful to scripts. + +Signed-off-by: Kevin Wolf +Message-Id: <20230112191454.169353-5-kwolf@redhat.com> +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit 07a4e1f8e5418f36424cd57d5d061b090a238c65) +Signed-off-by: Kevin Wolf +--- + .../qemu-iotests/tests/qemu-img-close-errors | 96 +++++++++++++++++++ + .../tests/qemu-img-close-errors.out | 23 +++++ + 2 files changed, 119 insertions(+) + create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors + create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out + +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors b/tests/qemu-iotests/tests/qemu-img-close-errors +new file mode 100755 +index 0000000000..50bfb6cfa2 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++# group: rw auto quick ++# ++# Check that errors while closing the image, in particular writing back dirty ++# bitmaps, is correctly reported with a failing qemu-img exit code. ++# ++# Copyright (C) 2023 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=kwolf@redhat.com ++ ++seq="$(basename $0)" ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt qcow2 ++_supported_proto file ++_supported_os Linux ++ ++size=1G ++ ++# The error we are going to use is ENOSPC. Depending on how many bitmaps we ++# create in the backing file (and therefore increase the used up space), we get ++# failures in different places. With a low number, only merging the bitmap ++# fails, whereas with a higher number, already 'qemu-img commit' fails. ++for max_bitmap in 6 7; do ++ echo ++ echo "=== Test with $max_bitmap bitmaps ===" ++ ++ TEST_IMG="$TEST_IMG.base" _make_test_img -q $size ++ for i in $(seq 1 $max_bitmap); do ++ $QEMU_IMG bitmap --add "$TEST_IMG.base" "stale-bitmap-$i" ++ done ++ ++ # Simulate a block device of 128 MB by resizing the image file accordingly ++ # and then enforcing the size with the raw driver ++ $QEMU_IO -f raw -c "truncate 128M" "$TEST_IMG.base" ++ BASE_JSON='json:{ ++ "driver": "qcow2", ++ "file": { ++ "driver": "raw", ++ "size": 134217728, ++ "file": { ++ "driver": "file", ++ "filename":"'"$TEST_IMG.base"'" ++ } ++ } ++ }' ++ ++ _make_test_img -q -b "$BASE_JSON" -F $IMGFMT ++ $QEMU_IMG bitmap --add "$TEST_IMG" "good-bitmap" ++ ++ $QEMU_IO -c 'write 0 126m' "$TEST_IMG" | _filter_qemu_io ++ ++ $QEMU_IMG commit -d "$TEST_IMG" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img commit exit code: ${PIPESTATUS[0]}" ++ ++ $QEMU_IMG bitmap --add "$BASE_JSON" "good-bitmap" ++ echo "qemu-img bitmap --add exit code: $?" ++ ++ $QEMU_IMG bitmap --merge "good-bitmap" -b "$TEST_IMG" "$BASE_JSON" \ ++ "good-bitmap" 2>&1 | _filter_generated_node_ids ++ echo "qemu-img bitmap --merge exit code: ${PIPESTATUS[0]}" ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 ++ +diff --git a/tests/qemu-iotests/tests/qemu-img-close-errors.out b/tests/qemu-iotests/tests/qemu-img-close-errors.out +new file mode 100644 +index 0000000000..1bfe88f176 +--- /dev/null ++++ b/tests/qemu-iotests/tests/qemu-img-close-errors.out +@@ -0,0 +1,23 @@ ++QA output created by qemu-img-close-errors ++ ++=== Test with 6 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++Image committed. ++qemu-img commit exit code: 0 ++qemu-img bitmap --add exit code: 0 ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'good-bitmap' to file: No space left on device ++qemu-img bitmap --merge exit code: 1 ++ ++=== Test with 7 bitmaps === ++wrote 132120576/132120576 bytes at offset 0 ++126 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Lost persistent bitmaps during inactivation of node 'NODE_NAME': Failed to write bitmap 'stale-bitmap-7' to file: No space left on device ++qemu-img: Error while closing the image: Invalid argument ++qemu-img commit exit code: 1 ++qemu-img bitmap --add exit code: 0 ++qemu-img bitmap --merge exit code: 0 ++*** done +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..0051baf --- /dev/null +++ b/SOURCES/kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,146 @@ +From d46ca52c3f42add549bd3790a41d06594821334e Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:10:57 -0500 +Subject: [PATCH 03/13] qemu-thread-posix: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [3/10] 746070c4d78c7f0a9ac4456d9aee69475acb8964 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 9586a1329f5dce6c1d7f4de53cf0536644d7e593 +Author: Paolo Bonzini +Date: Thu Mar 2 11:19:52 2023 +0100 + + qemu-thread-posix: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers too. Document more clearly what + is going on. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-posix.c | 69 ++++++++++++++++++++++++++++------------ + 1 file changed, 49 insertions(+), 20 deletions(-) + +diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c +index e1225b63bd..dd3b6d4670 100644 +--- a/util/qemu-thread-posix.c ++++ b/util/qemu-thread-posix.c +@@ -430,13 +430,21 @@ void qemu_event_destroy(QemuEvent *ev) + + void qemu_event_set(QemuEvent *ev) + { +- /* qemu_event_set has release semantics, but because it *loads* ++ assert(ev->initialized); ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ +- assert(ev->initialized); + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier in kernel futex_wait system call. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake(ev, INT_MAX); + } +@@ -445,18 +453,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* +- * If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -464,20 +473,40 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* +- * Leave the event reset and tell qemu_event_set that there +- * are waiters. No need to retry, because there cannot be +- * a concurrent busy->free transition. After the CAS, the +- * event will be either set or busy. ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. ++ * ++ * This cmpxchg doesn't have particular ordering requirements if it ++ * succeeds (moving the store earlier can only cause qemu_event_set() ++ * to issue _more_ wakeups), the failing case needs acquire semantics ++ * like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } ++ ++ /* ++ * This is the final check for a concurrent set, so it does need ++ * a smp_mb() pairing with the second barrier of qemu_event_set(). ++ * The barrier is inside the FUTEX_WAIT system call. ++ */ + qemu_futex_wait(ev, EV_BUSY); + } + } +-- +2.37.3 + diff --git a/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch new file mode 100644 index 0000000..3b63378 --- /dev/null +++ b/SOURCES/kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch @@ -0,0 +1,162 @@ +From fa730378c42567e77eaf3e70983108f31f9001b9 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Thu, 9 Mar 2023 08:11:05 -0500 +Subject: [PATCH 04/13] qemu-thread-win32: cleanup, fix, document QemuEvent + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 263: qatomic: add smp_mb__before/after_rmw() +RH-Bugzilla: 2168472 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Commit: [4/10] 43d5bd903b460d4c3c5793a456820e8c5c8521d9 + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168472 + +commit 6c5df4b48f0c52a61342ecb307a43f4c2a3565c4 +Author: Paolo Bonzini +Date: Thu Mar 2 11:22:50 2023 +0100 + + qemu-thread-win32: cleanup, fix, document QemuEvent + + QemuEvent is currently broken on ARM due to missing memory barriers + after qatomic_*(). Apart from adding the memory barrier, a closer look + reveals some unpaired memory barriers that are not really needed and + complicated the functions unnecessarily. Also, it is relying on + a memory barrier in ResetEvent(); the barrier _ought_ to be there + but there is really no documentation about it, so make it explicit. + + Reviewed-by: Richard Henderson + Reviewed-by: David Hildenbrand + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + util/qemu-thread-win32.c | 82 +++++++++++++++++++++++++++------------- + 1 file changed, 56 insertions(+), 26 deletions(-) + +diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c +index 52eb19f351..c10249bc2e 100644 +--- a/util/qemu-thread-win32.c ++++ b/util/qemu-thread-win32.c +@@ -246,12 +246,20 @@ void qemu_event_destroy(QemuEvent *ev) + void qemu_event_set(QemuEvent *ev) + { + assert(ev->initialized); +- /* qemu_event_set has release semantics, but because it *loads* ++ ++ /* ++ * Pairs with both qemu_event_reset() and qemu_event_wait(). ++ * ++ * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { +- if (qatomic_xchg(&ev->value, EV_SET) == EV_BUSY) { ++ int old = qatomic_xchg(&ev->value, EV_SET); ++ ++ /* Pairs with memory barrier after ResetEvent. */ ++ smp_mb__after_rmw(); ++ if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } +@@ -260,17 +268,19 @@ void qemu_event_set(QemuEvent *ev) + + void qemu_event_reset(QemuEvent *ev) + { +- unsigned value; +- + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); +- if (value == EV_SET) { +- /* If there was a concurrent reset (or even reset+wait), +- * do nothing. Otherwise change EV_SET->EV_FREE. +- */ +- qatomic_or(&ev->value, EV_FREE); +- } ++ ++ /* ++ * If there was a concurrent reset (or even reset+wait), ++ * do nothing. Otherwise change EV_SET->EV_FREE. ++ */ ++ qatomic_or(&ev->value, EV_FREE); ++ ++ /* ++ * Order reset before checking the condition in the caller. ++ * Pairs with the first memory barrier in qemu_event_set(). ++ */ ++ smp_mb__after_rmw(); + } + + void qemu_event_wait(QemuEvent *ev) +@@ -278,29 +288,49 @@ void qemu_event_wait(QemuEvent *ev) + unsigned value; + + assert(ev->initialized); +- value = qatomic_read(&ev->value); +- smp_mb_acquire(); ++ ++ /* ++ * qemu_event_wait must synchronize with qemu_event_set even if it does ++ * not go down the slow path, so this load-acquire is needed that ++ * synchronizes with the first memory barrier in qemu_event_set(). ++ * ++ * If we do go down the slow path, there is no requirement at all: we ++ * might miss a qemu_event_set() here but ultimately the memory barrier in ++ * qemu_futex_wait() will ensure the check is done correctly. ++ */ ++ value = qatomic_load_acquire(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { +- /* qemu_event_set is not yet going to call SetEvent, but we are +- * going to do another check for EV_SET below when setting EV_BUSY. +- * At that point it is safe to call WaitForSingleObject. ++ /* ++ * Here the underlying kernel event is reset, but qemu_event_set is ++ * not yet going to call SetEvent. However, there will be another ++ * check for EV_SET below when setting EV_BUSY. At that point it ++ * is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + +- /* Tell qemu_event_set that there are waiters. No need to retry +- * because there cannot be a concurrent busy->free transition. +- * After the CAS, the event will be either set or busy. ++ /* ++ * It is not clear whether ResetEvent provides this barrier; kernel ++ * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! ++ */ ++ smp_mb(); ++ ++ /* ++ * Leave the event reset and tell qemu_event_set that there are ++ * waiters. No need to retry, because there cannot be a concurrent ++ * busy->free transition. After the CAS, the event will be either ++ * set or busy. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { +- value = EV_SET; +- } else { +- value = EV_BUSY; ++ return; + } + } +- if (value == EV_BUSY) { +- WaitForSingleObject(ev->event, INFINITE); +- } ++ ++ /* ++ * ev->value is now EV_BUSY. Since we didn't observe EV_SET, ++ * qemu_event_set() must observe EV_BUSY and call SetEvent(). ++ */ ++ WaitForSingleObject(ev->event, INFINITE); + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch b/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch new file mode 100644 index 0000000..edf8ec9 --- /dev/null +++ b/SOURCES/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch @@ -0,0 +1,69 @@ +From 3541c9fc2c2dd5cf7dd583bc5645d82ea928d9e8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 10 Dec 2021 10:07:40 +0100 +Subject: [PATCH 1/2] redhat: Add rhel8.6.0 machine type for s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 90: Add rhel8.6.0 machine type for s390x +RH-Commit: [1/1] 91961fc52d708e6b30d7361fbab3572c5b5c1859 +RH-Bugzilla: 2005325 +RH-Acked-by: Greg Kurz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2005325 + +The new machine type has better default values for the upcoming +"generation 16" mainframe. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cf13c457d6..9795eb9406 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1103,10 +1103,21 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); ++ + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + ++ ccw_machine_rhel860_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); + + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); +@@ -1118,10 +1129,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + } +-DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch b/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch new file mode 100644 index 0000000..760a5fd --- /dev/null +++ b/SOURCES/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch @@ -0,0 +1,76 @@ +From 300cdf7f5b8b34e111c5e4141684af7329be46d9 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 13 Dec 2021 15:42:41 +0100 +Subject: [PATCH 2/2] redhat: Define pseries-rhel8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +RH-MergeRequest: 92: redhat: Define pseries-rhel8.6.0 machine type +RH-Commit: [1/1] 3c0f59d7ddf4bb22f382b5df7daa136730b9e866 +RH-Bugzilla: 2031041 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson (Red Hat) +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2031041 + +BRANCH: rhel-8.6.0 + +UPSTREAM: RHEL only + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=41989147 + +Signed-off-by: Greg Kurz +--- + hw/ppc/spapr.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f27888d8a..32cfe8f006 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5170,6 +5170,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.6.0 ++ * like pseries-6.2 ++ */ ++ ++static void spapr_machine_rhel860_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel860, "rhel8.6.0", true); ++ + /* + * pseries-rhel8.5.0 + * like pseries-6.0 +@@ -5179,15 +5192,14 @@ static void spapr_machine_rhel850_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; + } + +-DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", false); + + /* + * pseries-rhel8.4.0 +-- +2.27.0 + diff --git a/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch new file mode 100644 index 0000000..2ceb4e4 --- /dev/null +++ b/SOURCES/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -0,0 +1,111 @@ +From 21b19213328826327eba18199b790425659af7d8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 Jul 2022 16:55:34 +0200 +Subject: [PATCH 1/3] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [1/2] e514a00305cb0caab9d3acc0efb325853daa6d51 + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 + +Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f +("linux-headers: Update to v5.18-rc6"), but this is focusing on +the file linux-headers/linux/kvm.h only (since the other changes +related to the VFIO renaming might break some stuff). + +Signed-off-by: Thomas Huth +(cherry picked from commit 71516db15469a02600932a5c1f0d4a9626a91193) +Signed-off-by: Cédric Le Goater +--- + linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d232feaae9..0d05d02ee4 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -445,7 +445,11 @@ struct kvm_run { + #define KVM_SYSTEM_EVENT_RESET 2 + #define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; +- __u64 flags; ++ __u32 ndata; ++ union { ++ __u64 flags; ++ __u64 data[16]; ++ }; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { +@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ +@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { +@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 + #define KVM_CAP_SYS_ATTRIBUTES 209 ++#define KVM_CAP_PPC_AIL_MODE_3 210 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 ++#define KVM_CAP_PMU_CAPABILITY 212 ++#define KVM_CAP_DISABLE_QUIRKS2 213 ++/* #define KVM_CAP_VM_TSC_CONTROL 214 */ ++#define KVM_CAP_SYSTEM_EVENT_DATA 215 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1624,9 +1640,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-/* Available with KVM_CAP_XSAVE2 */ +-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +- + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + ++#define KVM_PMU_CAP_DISABLE (1 << 0) ++ + /** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. +-- +2.35.3 + diff --git a/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch new file mode 100644 index 0000000..d7bfc96 --- /dev/null +++ b/SOURCES/kvm-rhel-machine-types-x86-set-prefer_sockets.patch @@ -0,0 +1,52 @@ +From 0f0cbd57a8fe8f463941656f5bc75ae5754c3d2b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Dec 2021 18:39:47 +0000 +Subject: [PATCH 6/6] rhel machine types/x86: set prefer_sockets + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 96: Fixup x86 prefer_sockets +RH-Commit: [1/1] 29578bcc2f5d3408c155c155cdfa10b7a12faf4d +RH-Bugzilla: 2029582 +RH-Acked-by: Igor Mammedov +RH-Acked-by: quintela1 +RH-Acked-by: Cornelia Huck + +When I fixed up the machine types for 8.5 I missed the + prefer_sockets = true + +add them in; it looks like Power, ARM already have them, and I see them +in thuth's s390 patch. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 37fab00733..c30057c443 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1020,6 +1020,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 78876e1101..f6e77bca0e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, + pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch b/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch new file mode 100644 index 0000000..f42410d --- /dev/null +++ b/SOURCES/kvm-s390x-Add-KVM-PV-dump-interface.patch @@ -0,0 +1,124 @@ +From 95c229506a6e7261fce184488e880a94f9ba0789 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:21 +0000 +Subject: [PATCH 40/42] s390x: Add KVM PV dump interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [40/41] 5df512a63b2ed17991489565b70f89f4efc0b639 + +Let's add a few bits of code which hide the new KVM PV dump API from +us via new functions. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +[ Marc-André: fix up for compilation issue ] +Signed-off-by: Marc-André Lureau +Message-Id: <20221017083822.43118-10-frankja@linux.ibm.com> +(cherry picked from commit 753ca06f4706cd6e57750a606afb08c5c5299643) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 51 +++++++++++++++++++++++++++++++++++++++++++ + include/hw/s390x/pv.h | 9 ++++++++ + 2 files changed, 60 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 4c012f2eeb..728ba24547 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -175,6 +175,57 @@ bool kvm_s390_pv_info_basic_valid(void) + return info_valid; + } + ++static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr, ++ uint64_t len) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = subcmd, ++ .buff_addr = uaddr, ++ .buff_len = len, ++ .gaddr = gaddr, ++ }; ++ int ret; ++ ++ ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp); ++ if (ret) { ++ error_report("KVM DUMP command %ld failed", subcmd); ++ } ++ return ret; ++} ++ ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = KVM_PV_DUMP_CPU, ++ .buff_addr = (uint64_t)buff, ++ .gaddr = 0, ++ .buff_len = info_dump.dump_cpu_buffer_len, ++ }; ++ struct kvm_pv_cmd pv = { ++ .cmd = KVM_PV_DUMP, ++ .data = (uint64_t)&dmp, ++ }; ++ ++ return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv); ++} ++ ++int kvm_s390_dump_init(void) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0); ++} ++ ++int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest, ++ gaddr, len); ++} ++ ++int kvm_s390_dump_completion_data(void *buff) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0, ++ info_dump.dump_config_finalize_len); ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index e5ea0eca16..9360aa1091 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -51,6 +51,10 @@ uint64_t kvm_s390_pv_dmp_get_size_cpu(void); + uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); + uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); + bool kvm_s390_pv_info_basic_valid(void); ++int kvm_s390_dump_init(void); ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff); ++int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest); ++int kvm_s390_dump_completion_data(void *buff); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } +@@ -66,6 +70,11 @@ static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } + static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } ++static inline int kvm_s390_dump_init(void) { return 0; } ++static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; } ++static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, ++ void *dest) { return 0; } ++static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Add-protected-dump-cap.patch b/SOURCES/kvm-s390x-Add-protected-dump-cap.patch new file mode 100644 index 0000000..94da295 --- /dev/null +++ b/SOURCES/kvm-s390x-Add-protected-dump-cap.patch @@ -0,0 +1,113 @@ +From 7634eed5aea61dc94f9a828c62ef3da9aeaa62ae Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:18 +0000 +Subject: [PATCH 37/42] s390x: Add protected dump cap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [37/41] 52e1e7bf1a00ce3a220d3db2f733a65548bfec6d + +Add a protected dump capability for later feature checking. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Thomas Huth +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20221017083822.43118-7-frankja@linux.ibm.com> +[ Marc-André - Add missing stubs when !kvm ] +Signed-off-by: Marc-André Lureau +(cherry picked from commit ad3b2e693daac6ed92db7361236028851d37c77c) +Signed-off-by: Cédric Le Goater +--- + target/s390x/kvm/kvm.c | 7 +++++++ + target/s390x/kvm/kvm_s390x.h | 1 + + target/s390x/kvm/meson.build | 2 ++ + target/s390x/kvm/stubs.c | 12 ++++++++++++ + 4 files changed, 22 insertions(+) + create mode 100644 target/s390x/kvm/stubs.c + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 30712487d4..d36b44f32a 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -159,6 +159,7 @@ static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + static int cap_zpci_op; ++static int cap_protected_dump; + + static bool mem_op_storage_key_support; + +@@ -365,6 +366,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); + cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); ++ cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2042,6 +2044,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, + return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); + } + ++int kvm_s390_get_protected_dump(void) ++{ ++ return cap_protected_dump; ++} ++ + int kvm_s390_get_ri(void) + { + return cap_ri; +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index aaae8570de..f9785564d0 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); + void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); ++int kvm_s390_get_protected_dump(void); + int kvm_s390_get_ri(void); + int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); +diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build +index d1356356b1..aef52b6686 100644 +--- a/target/s390x/kvm/meson.build ++++ b/target/s390x/kvm/meson.build +@@ -1,6 +1,8 @@ + + s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 'kvm.c' ++), if_false: files( ++ 'stubs.c' + )) + + # Newer kernels on s390 check for an S390_PGSTE program header and +diff --git a/target/s390x/kvm/stubs.c b/target/s390x/kvm/stubs.c +new file mode 100644 +index 0000000000..5fd63b9a7e +--- /dev/null ++++ b/target/s390x/kvm/stubs.c +@@ -0,0 +1,12 @@ ++/* ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm_s390x.h" ++ ++int kvm_s390_get_protected_dump(void) ++{ ++ return false; ++} +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch b/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch new file mode 100644 index 0000000..dfb0169 --- /dev/null +++ b/SOURCES/kvm-s390x-Introduce-PV-query-interface.patch @@ -0,0 +1,174 @@ +From 760236b3633a8f532631256a899cab969e772196 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:19 +0000 +Subject: [PATCH 38/42] s390x: Introduce PV query interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [38/41] 3090615d81ec6b9e4c306f7fc3709e1935ff5a79 + +Introduce an interface over which we can get information about UV data. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Janis Schoetterl-Glausch +Acked-by: Thomas Huth +Message-Id: <20221017083822.43118-8-frankja@linux.ibm.com> +(cherry picked from commit 03d83ecfae46bf5e0074cb5808043b30df34064b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 61 ++++++++++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 6 ++++ + include/hw/s390x/pv.h | 10 +++++++ + 3 files changed, 77 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 401b63d6cb..4c012f2eeb 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -20,6 +20,11 @@ + #include "exec/confidential-guest-support.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" ++#include "target/s390x/kvm/kvm_s390x.h" ++ ++static bool info_valid; ++static struct kvm_s390_pv_info_vm info_vm; ++static struct kvm_s390_pv_info_dump info_dump; + + static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + { +@@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + } \ + } + ++int s390_pv_query_info(void) ++{ ++ struct kvm_s390_pv_info info = { ++ .header.id = KVM_PV_INFO_VM, ++ .header.len_max = sizeof(info.header) + sizeof(info.vm), ++ }; ++ int rc; ++ ++ /* Info API's first user is dump so they are bundled */ ++ if (!kvm_s390_get_protected_dump()) { ++ return 0; ++ } ++ ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ memcpy(&info_vm, &info.vm, sizeof(info.vm)); ++ ++ info.header.id = KVM_PV_INFO_DUMP; ++ info.header.len_max = sizeof(info.header) + sizeof(info.dump); ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ ++ memcpy(&info_dump, &info.dump, sizeof(info.dump)); ++ info_valid = true; ++ ++ return rc; ++} ++ + int s390_pv_vm_enable(void) + { + return s390_pv_cmd(KVM_PV_ENABLE, NULL); +@@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs) + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; + } + ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void) ++{ ++ return info_dump.dump_cpu_buffer_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) ++{ ++ return info_dump.dump_config_finalize_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) ++{ ++ return info_dump.dump_config_mem_buffer_per_1m; ++} ++ ++bool kvm_s390_pv_info_basic_valid(void) ++{ ++ return info_valid; ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bd80e72cf8..a9617ab79f 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -365,6 +365,12 @@ static int s390_machine_protect(S390CcwMachineState *ms) + + ms->pv = true; + ++ /* Will return 0 if API is not available since it's not vital */ ++ rc = s390_pv_query_info(); ++ if (rc) { ++ goto out_err; ++ } ++ + /* Set SE header and unpack */ + rc = s390_ipl_prepare_pv_header(); + if (rc) { +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 1f1f545bfc..e5ea0eca16 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void) + return ccw->pv; + } + ++int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); +@@ -46,8 +47,13 @@ void s390_pv_prep_reset(void); + int s390_pv_verify(void); + void s390_pv_unshare(void); + void s390_pv_inject_reset_error(CPUState *cs); ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void); ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); ++bool kvm_s390_pv_info_basic_valid(void); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } ++static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } +@@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } + static inline void s390_pv_unshare(void) {} + static inline void s390_pv_inject_reset_error(CPUState *cs) {}; ++static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } ++static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch b/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch new file mode 100644 index 0000000..f150979 --- /dev/null +++ b/SOURCES/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch @@ -0,0 +1,209 @@ +From 429c4cc750affe82b89867668ff2515a8a66732e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 18 Nov 2022 15:23:19 +0100 +Subject: [PATCH 2/3] s390x: Register TYPE_S390_CCW_MACHINE properties as class + properties +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 28a0086cb0e8be2535deafdd9115cadd7ff033f3 + +Currently, when running 'qemu-system-s390x -M s390-ccw-virtio,help' +the s390x-specific properties are not listed anymore. This happens +because since commit d8fb7d0969 ("vl: switch -M parsing to keyval") +the properties have to be defined at the class level and not at the +instance level anymore. Fix it on s390x now, too, by moving the +registration of the properties to the class level" + +Fixes: d8fb7d0969 ("vl: switch -M parsing to keyval") +Signed-off-by: Pierre Morel +Message-Id: <20221103170150.20789-2-pmorel@linux.ibm.com> +[thuth: Add patch description] +Signed-off-by: Thomas Huth +(cherry picked from commit 1fd396e32288bbf536483c74b68cb3ee86005a9f) + +Conflicts: + hw/s390x/s390-virtio-ccw.c + (dropped the "zpcii-disable" property code - it's not used in downstream) +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 117 +++++++++++++++++++++---------------- + 1 file changed, 67 insertions(+), 50 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index a9617ab79f..4a7cd21cac 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -42,6 +42,7 @@ + #include "sysemu/sysemu.h" + #include "hw/s390x/pv.h" + #include "migration/blocker.h" ++#include "qapi/visitor.h" + + static Error *pv_mig_blocker; + +@@ -588,38 +589,6 @@ static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) + return newsz; + } + +-static void ccw_machine_class_init(ObjectClass *oc, void *data) +-{ +- MachineClass *mc = MACHINE_CLASS(oc); +- NMIClass *nc = NMI_CLASS(oc); +- HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); +- +- s390mc->ri_allowed = true; +- s390mc->cpu_model_allowed = true; +- s390mc->css_migration_enabled = true; +- s390mc->hpage_1m_allowed = true; +- mc->init = ccw_init; +- mc->reset = s390_machine_reset; +- mc->block_default_type = IF_VIRTIO; +- mc->no_cdrom = 1; +- mc->no_floppy = 1; +- mc->no_parallel = 1; +- mc->no_sdcard = 1; +- mc->max_cpus = S390_MAX_CPUS; +- mc->has_hotpluggable_cpus = true; +- assert(!mc->get_hotplug_handler); +- mc->get_hotplug_handler = s390_get_hotplug_handler; +- mc->cpu_index_to_instance_props = s390_cpu_index_to_props; +- mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; +- /* it is overridden with 'host' cpu *in kvm_arch_init* */ +- mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); +- hc->plug = s390_machine_device_plug; +- hc->unplug_request = s390_machine_device_unplug_request; +- nc->nmi_monitor_handler = s390_nmi; +- mc->default_ram_id = "s390.ram"; +-} +- + static inline bool machine_get_aes_key_wrap(Object *obj, Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); +@@ -694,19 +663,29 @@ bool hpage_1m_allowed(void) + return get_machine_class()->hpage_1m_allowed; + } + +-static char *machine_get_loadparm(Object *obj, Error **errp) ++static void machine_get_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *str = g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); + +- /* make a NUL-terminated string */ +- return g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); ++ visit_type_str(v, name, &str, errp); ++ g_free(str); + } + +-static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ++static void machine_set_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *val; + int i; + ++ if (!visit_type_str(v, name, &val, errp)) { ++ return; ++ } ++ + for (i = 0; i < sizeof(ms->loadparm) && val[i]; i++) { + uint8_t c = qemu_toupper(val[i]); /* mimic HMC */ + +@@ -724,29 +703,67 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) + ms->loadparm[i] = ' '; /* pad right with spaces */ + } + } +-static inline void s390_machine_initfn(Object *obj) ++ ++static void ccw_machine_class_init(ObjectClass *oc, void *data) + { +- object_property_add_bool(obj, "aes-key-wrap", +- machine_get_aes_key_wrap, +- machine_set_aes_key_wrap); +- object_property_set_description(obj, "aes-key-wrap", ++ MachineClass *mc = MACHINE_CLASS(oc); ++ NMIClass *nc = NMI_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ ++ s390mc->ri_allowed = true; ++ s390mc->cpu_model_allowed = true; ++ s390mc->css_migration_enabled = true; ++ s390mc->hpage_1m_allowed = true; ++ mc->init = ccw_init; ++ mc->reset = s390_machine_reset; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->no_floppy = 1; ++ mc->no_parallel = 1; ++ mc->no_sdcard = 1; ++ mc->max_cpus = S390_MAX_CPUS; ++ mc->has_hotpluggable_cpus = true; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = s390_get_hotplug_handler; ++ mc->cpu_index_to_instance_props = s390_cpu_index_to_props; ++ mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; ++ /* it is overridden with 'host' cpu *in kvm_arch_init* */ ++ mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); ++ hc->plug = s390_machine_device_plug; ++ hc->unplug_request = s390_machine_device_unplug_request; ++ nc->nmi_monitor_handler = s390_nmi; ++ mc->default_ram_id = "s390.ram"; ++ ++ object_class_property_add_bool(oc, "aes-key-wrap", ++ machine_get_aes_key_wrap, ++ machine_set_aes_key_wrap); ++ object_class_property_set_description(oc, "aes-key-wrap", + "enable/disable AES key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "aes-key-wrap", true, NULL); + +- object_property_add_bool(obj, "dea-key-wrap", +- machine_get_dea_key_wrap, +- machine_set_dea_key_wrap); +- object_property_set_description(obj, "dea-key-wrap", ++ object_class_property_add_bool(oc, "dea-key-wrap", ++ machine_get_dea_key_wrap, ++ machine_set_dea_key_wrap); ++ object_class_property_set_description(oc, "dea-key-wrap", + "enable/disable DEA key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "dea-key-wrap", true, NULL); +- object_property_add_str(obj, "loadparm", +- machine_get_loadparm, machine_set_loadparm); +- object_property_set_description(obj, "loadparm", ++ ++ object_class_property_add(oc, "loadparm", "loadparm", ++ machine_get_loadparm, machine_set_loadparm, ++ NULL, NULL); ++ object_class_property_set_description(oc, "loadparm", + "Up to 8 chars in set of [A-Za-z0-9. ] (lower case chars converted" + " to upper case) to pass to machine loader, boot manager," + " and guest kernel"); + } + ++static inline void s390_machine_initfn(Object *obj) ++{ ++ S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ ++ ms->aes_key_wrap = true; ++ ms->dea_key_wrap = true; ++} ++ + static const TypeInfo ccw_machine_info = { + .name = TYPE_S390_CCW_MACHINE, + .parent = TYPE_MACHINE, +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-css-fix-PMCW-invalid-mask.patch b/SOURCES/kvm-s390x-css-fix-PMCW-invalid-mask.patch new file mode 100644 index 0000000..959eea9 --- /dev/null +++ b/SOURCES/kvm-s390x-css-fix-PMCW-invalid-mask.patch @@ -0,0 +1,58 @@ +From f3125f6379cbc070e9acaf58d0ec37972992744b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 6 Apr 2022 10:56:26 +0200 +Subject: [PATCH 4/5] s390x/css: fix PMCW invalid mask + +RH-Author: Thomas Huth +RH-MergeRequest: 145: s390x/css: fix PMCW invalid mask +RH-Commit: [1/1] fbf192f651aa668af56ca5c77455595fcdb19508 +RH-Bugzilla: 2071070 +RH-Acked-by: Jon Maloy +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2071070 + +commit 2df59b73e0864f021f6179f32f7ed364f6d4f38d +Author: Nico Boehr +Date: Thu Dec 16 14:16:57 2021 +0100 + + s390x/css: fix PMCW invalid mask + + Previously, we required bits 5, 6 and 7 to be zero (0x07 == 0b111). But, + as per the principles of operation, bit 5 is ignored in MSCH and bits 0, + 1, 6 and 7 need to be zero. + + As both PMCW_FLAGS_MASK_INVALID and ioinst_schib_valid() are only used + by ioinst_handle_msch(), adjust the mask accordingly. + + Fixes: db1c8f53bfb1 ("s390: Channel I/O basic definitions.") + Signed-off-by: Nico Boehr + Reviewed-by: Pierre Morel + Reviewed-by: Halil Pasic + Reviewed-by: Janosch Frank + Reviewed-by: Cornelia Huck + Message-Id: <20211216131657.1057978-1-nrb@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + include/hw/s390x/ioinst.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h +index 3771fff9d4..ea8d0f2444 100644 +--- a/include/hw/s390x/ioinst.h ++++ b/include/hw/s390x/ioinst.h +@@ -107,7 +107,7 @@ QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong"); + #define PMCW_FLAGS_MASK_MP 0x0004 + #define PMCW_FLAGS_MASK_TF 0x0002 + #define PMCW_FLAGS_MASK_DNV 0x0001 +-#define PMCW_FLAGS_MASK_INVALID 0x0700 ++#define PMCW_FLAGS_MASK_INVALID 0xc300 + + #define PMCW_CHARS_MASK_ST 0x00e00000 + #define PMCW_CHARS_MASK_MBFC 0x00000004 +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch b/SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch new file mode 100644 index 0000000..d62a45a --- /dev/null +++ b/SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch @@ -0,0 +1,97 @@ +From ddfee9d393af322938e4df466cd01b8f9570a1c9 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 5 Apr 2022 10:20:59 +0200 +Subject: [PATCH 1/6] s390x/ipl: support extended kernel command line size + +RH-Author: Thomas Huth +RH-MergeRequest: 144: s390x/ipl: support extended kernel command line size +RH-Commit: [1/1] be227e50af5dbe7802605f873db29ac5358aa196 +RH-Bugzilla: 2043830 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: http://bugzilla.redhat.com/2043830 + +commit b2173046a64beed76715f310f98538f159276af1 +Author: Marc Hartmayer +Date: Mon Nov 22 12:29:09 2021 +0100 + + s390x/ipl: support extended kernel command line size + + In the past s390 used a fixed command line length of 896 bytes. This has changed + with the Linux commit 5ecb2da660ab ("s390: support command lines longer than 896 + bytes"). There is now a parm area indicating the maximum command line size. This + parm area has always been initialized to zero, so with older kernels this field + would read zero and we must then assume that only 896 bytes are available. + + Signed-off-by: Marc Hartmayer + Reviewed-by: David Hildenbrand + Reviewed-by: Christian Borntraeger + Acked-by: Viktor Mihajlovski + Message-Id: <20211122112909.18138-1-mhartmay@linux.ibm.com> + [thuth: Cosmetic fixes, and use PRIu64 instead of %lu] + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + hw/s390x/ipl.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 7ddca0127f..eb7fc4c4ae 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -37,8 +37,9 @@ + + #define KERN_IMAGE_START 0x010000UL + #define LINUX_MAGIC_ADDR 0x010008UL ++#define KERN_PARM_AREA_SIZE_ADDR 0x010430UL + #define KERN_PARM_AREA 0x010480UL +-#define KERN_PARM_AREA_SIZE 0x000380UL ++#define LEGACY_KERN_PARM_AREA_SIZE 0x000380UL + #define INITRD_START 0x800000UL + #define INITRD_PARM_START 0x010408UL + #define PARMFILE_START 0x001000UL +@@ -110,6 +111,21 @@ static uint64_t bios_translate_addr(void *opaque, uint64_t srcaddr) + return srcaddr + dstaddr; + } + ++static uint64_t get_max_kernel_cmdline_size(void) ++{ ++ uint64_t *size_ptr = rom_ptr(KERN_PARM_AREA_SIZE_ADDR, sizeof(*size_ptr)); ++ ++ if (size_ptr) { ++ uint64_t size; ++ ++ size = be64_to_cpu(*size_ptr); ++ if (size) { ++ return size; ++ } ++ } ++ return LEGACY_KERN_PARM_AREA_SIZE; ++} ++ + static void s390_ipl_realize(DeviceState *dev, Error **errp) + { + MachineState *ms = MACHINE(qdev_get_machine()); +@@ -197,10 +213,13 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) + ipl->start_addr = KERN_IMAGE_START; + /* Overwrite parameters in the kernel image, which are "rom" */ + if (parm_area) { +- if (cmdline_size > KERN_PARM_AREA_SIZE) { ++ uint64_t max_cmdline_size = get_max_kernel_cmdline_size(); ++ ++ if (cmdline_size > max_cmdline_size) { + error_setg(errp, +- "kernel command line exceeds maximum size: %zu > %lu", +- cmdline_size, KERN_PARM_AREA_SIZE); ++ "kernel command line exceeds maximum size:" ++ " %zu > %" PRIu64, ++ cmdline_size, max_cmdline_size); + return; + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch b/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch new file mode 100644 index 0000000..2e90869 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch @@ -0,0 +1,114 @@ +From 2f0febd6813c4ad7f52e43afb3ecce7aef3557e6 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:56 -0400 +Subject: [PATCH 08/11] s390x/pci: RPCIT second pass when mappings exhausted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 0b4500b9247725b1ef0b290bb85392300a618cac + +If we encounter a new mapping while the number of available DMA entries +in vfio is 0, we are currently skipping that mapping which is a problem +if we manage to free up DMA space after that within the same RPCIT -- +we will return to the guest with CC0 and have not mapped everything +within the specified range. This issue was uncovered while testing +changes to the s390 linux kernel iommu/dma code, where a different +usage pattern was employed (new mappings start at the end of the +aperture and work back towards the front, making us far more likely +to encounter new mappings before invalidated mappings during a +global refresh). + +Fix this by tracking whether any mappings were skipped due to vfio +DMA limit hitting 0; when this occurs, we still continue the range +and unmap/map anything we can - then we must re-run the range again +to pickup anything that was missed. This must occur in a loop until +all requests are satisfied (success) or we detect that we are still +unable to complete all mappings (return ZPCI_RPCIT_ST_INSUFF_RES). + +Link: https://lore.kernel.org/linux-s390/20221019144435.369902-1-schnelle@linux.ibm.com/ +Fixes: 37fa32de70 ("s390x/pci: Honor DMA limits set by vfio") +Reported-by: Niklas Schnelle +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-2-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit 4a8d21ba50fc8625c3bd51dab903872952f95718) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 29 ++++++++++++++++++++++------- + 1 file changed, 22 insertions(+), 7 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 20a9bcc7af..7cc4bcf850 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -677,8 +677,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + S390PCIBusDevice *pbdev; + S390PCIIOMMU *iommu; + S390IOTLBEntry entry; +- hwaddr start, end; ++ hwaddr start, end, sstart; + uint32_t dma_avail; ++ bool again; + + if (env->psw.mask & PSW_MASK_PSTATE) { + s390_program_interrupt(env, PGM_PRIVILEGED, ra); +@@ -691,7 +692,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + fh = env->regs[r1] >> 32; +- start = env->regs[r2]; ++ sstart = start = env->regs[r2]; + end = start + env->regs[r2 + 1]; + + pbdev = s390_pci_find_dev_by_fh(s390_get_phb(), fh); +@@ -732,6 +733,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + goto err; + } + ++ retry: ++ start = sstart; ++ again = false; + while (start < end) { + error = s390_guest_io_table_walk(iommu->g_iota, start, &entry); + if (error) { +@@ -739,13 +743,24 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + + start += entry.len; +- while (entry.iova < start && entry.iova < end && +- (dma_avail > 0 || entry.perm == IOMMU_NONE)) { +- dma_avail = s390_pci_update_iotlb(iommu, &entry); +- entry.iova += TARGET_PAGE_SIZE; +- entry.translated_addr += TARGET_PAGE_SIZE; ++ while (entry.iova < start && entry.iova < end) { ++ if (dma_avail > 0 || entry.perm == IOMMU_NONE) { ++ dma_avail = s390_pci_update_iotlb(iommu, &entry); ++ entry.iova += TARGET_PAGE_SIZE; ++ entry.translated_addr += TARGET_PAGE_SIZE; ++ } else { ++ /* ++ * We are unable to make a new mapping at this time, continue ++ * on and hopefully free up more space. Then attempt another ++ * pass. ++ */ ++ again = true; ++ break; ++ } + } + } ++ if (again && dma_avail > 0) ++ goto retry; + err: + if (error) { + pbdev->state = ZPCI_FS_ERROR; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch b/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch new file mode 100644 index 0000000..88716f5 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch @@ -0,0 +1,178 @@ +From 8020177f1c40da2a9ca09fa20dc90eda65739671 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:31 -0400 +Subject: [PATCH 06/42] s390x/pci: add routine to get host function handle from + CLP info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [6/41] 8ab652cf4095e61f5f55726d41111de227d452e7 + +In order to interface with the underlying host zPCI device, we need +to know its function handle. Add a routine to grab this from the +vfio CLP capabilities chain. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-3-mjrosato@linux.ibm.com> +[thuth: Replace free(info) with g_free(info)] +Signed-off-by: Thomas Huth +(cherry picked from commit 21fa15298d88db2050a713cdf79c10cb0e09146f) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 83 ++++++++++++++++++++++++++------ + include/hw/s390x/s390-pci-vfio.h | 5 ++ + 2 files changed, 72 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 6f80a47e29..08bcc55e85 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,27 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + pbdev->zpci_fn.pft = 0; + } + ++static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, ++ uint32_t *fh) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_device_info_cap_zpci_base *cap; ++ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ ++ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ ++ /* Can only get the host fh with version 2 or greater */ ++ if (hdr == NULL || hdr->version < 2) { ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ return false; ++ } ++ cap = (void *) hdr; ++ ++ *fh = cap->fh; ++ return true; ++} ++ + static void s390_pci_read_group(S390PCIBusDevice *pbdev, + struct vfio_device_info *info) + { +@@ -217,25 +238,13 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-/* +- * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for +- * capabilities that contain information about CLP features provided by the +- * underlying host. +- * On entry, defaults have already been placed into the guest CLP response +- * buffers. On exit, defaults will have been overwritten for any CLP features +- * found in the capability chain; defaults will remain for any CLP features not +- * found in the chain. +- */ +-void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, ++ uint32_t argsz) + { +- g_autofree struct vfio_device_info *info = NULL; ++ struct vfio_device_info *info = g_malloc0(argsz); + VFIOPCIDevice *vfio_pci; +- uint32_t argsz; + int fd; + +- argsz = sizeof(*info); +- info = g_malloc0(argsz); +- + vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + fd = vfio_pci->vbasedev.fd; + +@@ -250,7 +259,8 @@ retry: + + if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { + trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- return; ++ g_free(info); ++ return NULL; + } + + if (info->argsz > argsz) { +@@ -259,6 +269,47 @@ retry: + goto retry; + } + ++ return info; ++} ++ ++/* ++ * Get the host function handle from the vfio CLP capabilities chain. Returns ++ * true if a fh value was placed into the provided buffer. Returns false ++ * if a fh could not be obtained (ioctl failed or capabilitiy version does ++ * not include the fh) ++ */ ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ assert(fh); ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return false; ++ } ++ ++ return get_host_fh(pbdev, info, fh); ++} ++ ++/* ++ * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for ++ * capabilities that contain information about CLP features provided by the ++ * underlying host. ++ * On entry, defaults have already been placed into the guest CLP response ++ * buffers. On exit, defaults will have been overwritten for any CLP features ++ * found in the capability chain; defaults will remain for any CLP features not ++ * found in the chain. ++ */ ++void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return; ++ } ++ + /* + * Find the CLP features provided and fill in the guest CLP responses. + * Always call s390_pci_read_base first as information from this could +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +index ff708aef50..ae1b126ff7 100644 +--- a/include/hw/s390x/s390-pci-vfio.h ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -20,6 +20,7 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail); + S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev); + void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh); + void s390_pci_get_clp_info(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) +@@ -33,6 +34,10 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + } + static inline void s390_pci_end_dma_count(S390pciState *s, + S390PCIDMACount *cnt) { } ++static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ return false; ++} + static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { } + #endif + +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch b/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch new file mode 100644 index 0000000..563f782 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch @@ -0,0 +1,99 @@ +From de6319fe0ce09297beae5ff4636c03217abe6f26 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:06 -0500 +Subject: [PATCH 04/42] s390x/pci: add supported DT information to clp response +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [4/41] 275668f6d38fbc1dfa2f1aa8f58b2c319de2657d + +The DTSM is a mask that specifies which I/O Address Translation designation +types are supported. Today QEMU only supports DT=1. + +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit ac6aa30ac47b2abaf142f76de46374da2a98f6e7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 1 + + hw/s390x/s390-pci-inst.c | 1 + + hw/s390x/s390-pci-vfio.c | 1 + + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-clp.h | 3 ++- + 5 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 1b51a72838..01b58ebc70 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -782,6 +782,7 @@ static void s390_pci_init_default_group(void) + resgrp->i = 128; + resgrp->maxstbl = 128; + resgrp->version = 0; ++ resgrp->dtsm = ZPCI_DTSM; + } + + static void set_pbdev_info(S390PCIBusDevice *pbdev) +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 07bab85ce5..6d400d4147 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -329,6 +329,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + stw_p(&resgrp->i, group->zpci_group.i); + stw_p(&resgrp->maxstbl, group->zpci_group.maxstbl); + resgrp->version = group->zpci_group.version; ++ resgrp->dtsm = group->zpci_group.dtsm; + stw_p(&resgrp->hdr.rsp, CLP_RC_OK); + break; + } +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 2a153fa8c9..6f80a47e29 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -160,6 +160,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->i = cap->noi; + resgrp->maxstbl = cap->maxstbl; + resgrp->version = cap->version; ++ resgrp->dtsm = ZPCI_DTSM; + } + } + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 2727e7bdef..da3cde2bb4 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -37,6 +37,7 @@ + #define ZPCI_MAX_UID 0xffff + #define UID_UNDEFINED 0 + #define UID_CHECKING_ENABLED 0x01 ++#define ZPCI_DTSM 0x40 + + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) +diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h +index 96b8e3f133..cc8c8662b8 100644 +--- a/include/hw/s390x/s390-pci-clp.h ++++ b/include/hw/s390x/s390-pci-clp.h +@@ -163,7 +163,8 @@ typedef struct ClpRspQueryPciGrp { + uint8_t fr; + uint16_t maxstbl; + uint16_t mui; +- uint64_t reserved3; ++ uint8_t dtsm; ++ uint8_t reserved3[7]; + uint64_t dasm; /* dma address space mask */ + uint64_t msia; /* MSI address */ + uint64_t reserved4; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch new file mode 100644 index 0000000..baa0a9c --- /dev/null +++ b/SOURCES/kvm-s390x-pci-coalesce-unmap-operations.patch @@ -0,0 +1,125 @@ +From b972c5a2763a91024725c147cf1691ed8e180c7c Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:57 -0400 +Subject: [PATCH 09/11] s390x/pci: coalesce unmap operations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 7b5ee38eca565f5a7cbede4b9883ba3a508fb46c + +Currently, each unmapped page is handled as an individual iommu +region notification. Attempt to group contiguous unmap operations +into fewer notifications to reduce overhead. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-3-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit ef536007c3301bbd6a787e4c2210ea289adaa6f0) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 51 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 51 insertions(+) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 7cc4bcf850..66e764f901 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -640,6 +640,8 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + } + g_hash_table_remove(iommu->iotlb, &entry->iova); + inc_dma_avail(iommu); ++ /* Don't notify the iommu yet, maybe we can bundle contiguous unmaps */ ++ goto out; + } else { + if (cache) { + if (cache->perm == entry->perm && +@@ -663,15 +665,44 @@ static uint32_t s390_pci_update_iotlb(S390PCIIOMMU *iommu, + dec_dma_avail(iommu); + } + ++ /* ++ * All associated iotlb entries have already been cleared, trigger the ++ * unmaps. ++ */ + memory_region_notify_iommu(&iommu->iommu_mr, 0, event); + + out: + return iommu->dma_limit ? iommu->dma_limit->avail : 1; + } + ++static void s390_pci_batch_unmap(S390PCIIOMMU *iommu, uint64_t iova, ++ uint64_t len) ++{ ++ uint64_t remain = len, start = iova, end = start + len - 1, mask, size; ++ IOMMUTLBEvent event = { ++ .type = IOMMU_NOTIFIER_UNMAP, ++ .entry = { ++ .target_as = &address_space_memory, ++ .translated_addr = 0, ++ .perm = IOMMU_NONE, ++ }, ++ }; ++ ++ while (remain >= TARGET_PAGE_SIZE) { ++ mask = dma_aligned_pow2_mask(start, end, 64); ++ size = mask + 1; ++ event.entry.iova = start; ++ event.entry.addr_mask = mask; ++ memory_region_notify_iommu(&iommu->iommu_mr, 0, event); ++ start += size; ++ remain -= size; ++ } ++} ++ + int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + { + CPUS390XState *env = &cpu->env; ++ uint64_t iova, coalesce = 0; + uint32_t fh; + uint16_t error = 0; + S390PCIBusDevice *pbdev; +@@ -742,6 +773,21 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + break; + } + ++ /* ++ * If this is an unmap of a PTE, let's try to coalesce multiple unmaps ++ * into as few notifier events as possible. ++ */ ++ if (entry.perm == IOMMU_NONE && entry.len == TARGET_PAGE_SIZE) { ++ if (coalesce == 0) { ++ iova = entry.iova; ++ } ++ coalesce += entry.len; ++ } else if (coalesce > 0) { ++ /* Unleash the coalesced unmap before processing a new map */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } ++ + start += entry.len; + while (entry.iova < start && entry.iova < end) { + if (dma_avail > 0 || entry.perm == IOMMU_NONE) { +@@ -759,6 +805,11 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2, uintptr_t ra) + } + } + } ++ if (coalesce) { ++ /* Unleash the coalesced unmap before finishing rpcit */ ++ s390_pci_batch_unmap(iommu, iova, coalesce); ++ coalesce = 0; ++ } + if (again && dma_avail > 0) + goto retry; + err: +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch b/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch new file mode 100644 index 0000000..4403658 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch @@ -0,0 +1,60 @@ +From 5bd57d8ac3a4e75337eae81a3623b4dc2b417e2f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:33 -0400 +Subject: [PATCH 08/42] s390x/pci: don't fence interpreted devices without + MSI-X +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [8/41] 52bad4368e9494c43133338b386dc0cc159aeedc + +Lack of MSI-X support is not an issue for interpreted passthrough +devices, so let's let these in. This will allow, for example, ISM +devices to be passed through -- but only when interpretation is +available and being used. + +Signed-off-by: Matthew Rosato +Reviewed-by: Thomas Huth +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 15d0e7942d3b31ff71d8e0e8cec3a8203214f19b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 18bfae0465..07c7c155e3 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -881,6 +881,10 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev) + + static void s390_pci_msix_free(S390PCIBusDevice *pbdev) + { ++ if (pbdev->msix.entries == 0) { ++ return; ++ } ++ + memory_region_del_subregion(&pbdev->iommu->mr, &pbdev->msix_notify_mr); + object_unparent(OBJECT(&pbdev->msix_notify_mr)); + } +@@ -1093,7 +1097,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->interp = false; + } + +- if (s390_pci_msix_init(pbdev)) { ++ if (s390_pci_msix_init(pbdev) && !pbdev->interp) { + error_setg(errp, "MSI-X support is mandatory " + "in the S390 architecture"); + return; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch b/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch new file mode 100644 index 0000000..c97b587 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch @@ -0,0 +1,77 @@ +From 67ebb71d56e95adf185ab4971939e31c4c899863 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:04 -0500 +Subject: [PATCH 02/42] s390x/pci: don't use hard-coded dma range in reg_ioat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [2/41] c7897321f9848ef8f115130832774bbcd6724f03 + +Instead use the values from clp info, they will either be the hard-coded +values or what came from the host driver via vfio. + +Fixes: 9670ee752727 ("s390x/pci: use a PCI Function structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-3-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit df7ce0a94d9283f0656b4bc0f21566973ff649a3) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 1c8ad91175..11b7f6bfa1 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -916,9 +916,10 @@ int pci_dereg_irqs(S390PCIBusDevice *pbdev) + return 0; + } + +-static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, ++static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib, + uintptr_t ra) + { ++ S390PCIIOMMU *iommu = pbdev->iommu; + uint64_t pba = ldq_p(&fib.pba); + uint64_t pal = ldq_p(&fib.pal); + uint64_t g_iota = ldq_p(&fib.iota); +@@ -927,7 +928,7 @@ static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, + + pba &= ~0xfff; + pal |= 0xfff; +- if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) { ++ if (pba > pal || pba < pbdev->zpci_fn.sdma || pal > pbdev->zpci_fn.edma) { + s390_program_interrupt(env, PGM_OPERAND, ra); + return -EINVAL; + } +@@ -1125,7 +1126,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } else if (pbdev->iommu->enabled) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); +- } else if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ } else if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +@@ -1150,7 +1151,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { + pci_dereg_ioat(pbdev->iommu); +- if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch b/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch new file mode 100644 index 0000000..c36c575 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch @@ -0,0 +1,265 @@ +From 362fae654bbae03741003e565fb95d73d8c0025f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:34 -0400 +Subject: [PATCH 09/42] s390x/pci: enable adapter event notification for + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [9/41] 771975c436c7cb608e0e9e40edd732ac310beb69 + +Use the associated kvm ioctl operation to enable adapter event notification +and forwarding for devices when requested. This feature will be set up +with or without firmware assist based upon the 'forwarding_assist' setting. + +Signed-off-by: Matthew Rosato +Message-Id: <20220902172737.170349-6-mjrosato@linux.ibm.com> +[thuth: Rename "forwarding_assist" property to "forwarding-assist"] +Signed-off-by: Thomas Huth +(cherry picked from commit d0bc7091c2013ad2fa164100cf7b17962370e8ab) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 20 ++++++++++++++--- + hw/s390x/s390-pci-inst.c | 40 +++++++++++++++++++++++++++++++-- + hw/s390x/s390-pci-kvm.c | 30 +++++++++++++++++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 14 ++++++++++++ + 5 files changed, 100 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 07c7c155e3..cd152ce711 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -190,7 +190,10 @@ void s390_pci_sclp_deconfigure(SCCB *sccb) + rc = SCLP_RC_NO_ACTION_REQUIRED; + break; + default: +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1082,6 +1085,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } else { + DPRINTF("zPCI interpretation facilities missing.\n"); + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); +@@ -1090,11 +1094,13 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!pbdev->interp) { + /* Do vfio passthrough but intercept for I/O */ + pbdev->fh |= FH_SHM_VFIO; ++ pbdev->forwarding_assist = false; + } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + + if (s390_pci_msix_init(pbdev) && !pbdev->interp) { +@@ -1244,7 +1250,10 @@ static void s390_pcihost_reset(DeviceState *dev) + /* Process all pending unplug requests */ + QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { + if (pbdev->unplug_requested) { +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1382,7 +1391,10 @@ static void s390_pci_device_reset(DeviceState *dev) + break; + } + +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1428,6 +1440,8 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), + DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), ++ DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist, ++ true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 651ec38635..20a9bcc7af 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1066,6 +1066,32 @@ static void fmb_update(void *opaque) + timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + ++static int mpcifc_reg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_enable(pbdev, fib, pbdev->forwarding_assist); ++ if (rc) { ++ DPRINTF("Failed to enable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++static int mpcifc_dereg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_disable(pbdev); ++ if (rc) { ++ DPRINTF("Failed to disable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + uintptr_t ra) + { +@@ -1120,7 +1146,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + + switch (oc) { + case ZPCI_MOD_FC_REG_INT: +- if (pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_reg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else if (reg_irqs(env, pbdev, fib)) { +@@ -1129,7 +1160,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + break; + case ZPCI_MOD_FC_DEREG_INT: +- if (!pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_dereg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (!pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index 0f16104a74..9134fe185f 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -11,12 +11,42 @@ + + #include "qemu/osdep.h" + ++#include ++ + #include "kvm/kvm_s390x.h" + #include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-inst.h" + #include "cpu_models.h" + + bool s390_pci_kvm_interp_allowed(void) + { + return kvm_s390_get_zpci_op() && !s390_is_pv(); + } ++ ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_REG_AEN, ++ .u.reg_aen.ibv = fib->aibv, ++ .u.reg_aen.sb = fib->aisb, ++ .u.reg_aen.noi = FIB_DATA_NOI(fib->data), ++ .u.reg_aen.isc = FIB_DATA_ISC(fib->data), ++ .u.reg_aen.sbo = FIB_DATA_AISBO(fib->data), ++ .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} ++ ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_DEREG_AEN ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index a9843dfe97..5b09f0cf2f 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -351,6 +351,7 @@ struct S390PCIBusDevice { + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; ++ bool forwarding_assist; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +index 80a2e7d0ca..933814a402 100644 +--- a/include/hw/s390x/s390-pci-kvm.h ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -12,13 +12,27 @@ + #ifndef HW_S390_PCI_KVM_H + #define HW_S390_PCI_KVM_H + ++#include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-inst.h" ++ + #ifdef CONFIG_KVM + bool s390_pci_kvm_interp_allowed(void); ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist); ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_kvm_interp_allowed(void) + { + return false; + } ++static inline int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, ++ bool assist) ++{ ++ return -EINVAL; ++} ++static inline int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ return -EINVAL; ++} + #endif + + #endif +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch b/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch new file mode 100644 index 0000000..56f228b --- /dev/null +++ b/SOURCES/kvm-s390x-pci-enable-for-load-store-interpretation.patch @@ -0,0 +1,319 @@ +From 62fbb66d18f598d0896164383aab465e093fb0c1 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:32 -0400 +Subject: [PATCH 07/42] s390x/pci: enable for load/store interpretation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [7/41] 3a96e901e295bb9e0c530638c45b5da5d60c00bd + +If the ZPCI_OP ioctl reports that is is available and usable, then the +underlying KVM host will enable load/store intepretation for any guest +device without a SHM bit in the guest function handle. For a device that +will be using interpretation support, ensure the guest function handle +matches the host function handle; this value is re-checked every time the +guest issues a SET PCI FN to enable the guest device as it is the only +opportunity to reflect function handle changes. + +By default, unless interpret=off is specified, interpretation support will +always be assumed and exploited if the necessary ioctl and features are +available on the host kernel. When these are unavailable, we will silently +revert to the interception model; this allows existing guest configurations +to work unmodified on hosts with and without zPCI interpretation support, +allowing QEMU to choose the best support model available. + +Signed-off-by: Matthew Rosato +Acked-by: Thomas Huth +Message-Id: <20220902172737.170349-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit dd1d5fd9684beeb0c14c39f497ef2aa9ac683aa7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/meson.build | 1 + + hw/s390x/s390-pci-bus.c | 66 ++++++++++++++++++++++++++++++++- + hw/s390x/s390-pci-inst.c | 16 ++++++++ + hw/s390x/s390-pci-kvm.c | 22 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 24 ++++++++++++ + target/s390x/kvm/kvm.c | 7 ++++ + target/s390x/kvm/kvm_s390x.h | 1 + + 8 files changed, 137 insertions(+), 1 deletion(-) + create mode 100644 hw/s390x/s390-pci-kvm.c + create mode 100644 include/hw/s390x/s390-pci-kvm.h + +diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build +index 28484256ec..6e6e47fcda 100644 +--- a/hw/s390x/meson.build ++++ b/hw/s390x/meson.build +@@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 's390-skeys-kvm.c', + 's390-stattrib-kvm.c', + 'pv.c', ++ 's390-pci-kvm.c', + )) + s390x_ss.add(when: 'CONFIG_TCG', if_true: files( + 'tod-tcg.c', +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 01b58ebc70..18bfae0465 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -16,6 +16,7 @@ + #include "qapi/visitor.h" + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-kvm.h" + #include "hw/s390x/s390-pci-vfio.h" + #include "hw/pci/pci_bus.h" + #include "hw/qdev-properties.h" +@@ -971,12 +972,51 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr) + } + } + ++static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev) ++{ ++ uint32_t idx, fh; ++ ++ if (!s390_pci_get_host_fh(pbdev, &fh)) { ++ return -EPERM; ++ } ++ ++ /* ++ * The host device is already in an enabled state, but we always present ++ * the initial device state to the guest as disabled (ZPCI_FS_DISABLED). ++ * Therefore, mask off the enable bit from the passthrough handle until ++ * the guest issues a CLP SET PCI FN later to enable the device. ++ */ ++ pbdev->fh = fh & ~FH_MASK_ENABLE; ++ ++ /* Next, see if the idx is already in-use */ ++ idx = pbdev->fh & FH_MASK_INDEX; ++ if (pbdev->idx != idx) { ++ if (s390_pci_find_dev_by_idx(s, idx)) { ++ return -EINVAL; ++ } ++ /* ++ * Update the idx entry with the passed through idx ++ * If the relinquished idx is lower than next_idx, use it ++ * to replace next_idx ++ */ ++ g_hash_table_remove(s->zpci_table, &pbdev->idx); ++ if (idx < s->next_idx) { ++ s->next_idx = idx; ++ } ++ pbdev->idx = idx; ++ g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev); ++ } ++ ++ return 0; ++} ++ + static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { + S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); + PCIDevice *pdev = NULL; + S390PCIBusDevice *pbdev = NULL; ++ int rc; + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBridge *pb = PCI_BRIDGE(dev); +@@ -1022,12 +1062,35 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + set_pbdev_info(pbdev); + + if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { +- pbdev->fh |= FH_SHM_VFIO; ++ /* ++ * By default, interpretation is always requested; if the available ++ * facilities indicate it is not available, fallback to the ++ * interception model. ++ */ ++ if (pbdev->interp) { ++ if (s390_pci_kvm_interp_allowed()) { ++ rc = s390_pci_interp_plug(s, pbdev); ++ if (rc) { ++ error_setg(errp, "Plug failed for zPCI device in " ++ "interpretation mode: %d", rc); ++ return; ++ } ++ } else { ++ DPRINTF("zPCI interpretation facilities missing.\n"); ++ pbdev->interp = false; ++ } ++ } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); + /* Fill in CLP information passed via the vfio region */ + s390_pci_get_clp_info(pbdev); ++ if (!pbdev->interp) { ++ /* Do vfio passthrough but intercept for I/O */ ++ pbdev->fh |= FH_SHM_VFIO; ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; ++ /* Always intercept emulated devices */ ++ pbdev->interp = false; + } + + if (s390_pci_msix_init(pbdev)) { +@@ -1360,6 +1423,7 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED), + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), ++ DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 6d400d4147..651ec38635 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -18,6 +18,8 @@ + #include "sysemu/hw_accel.h" + #include "hw/s390x/s390-pci-inst.h" + #include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "hw/s390x/tod.h" + + #ifndef DEBUG_S390PCI_INST +@@ -246,6 +248,20 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + goto out; + } + ++ /* ++ * Take this opportunity to make sure we still have an accurate ++ * host fh. It's possible part of the handle changed while the ++ * device was disabled to the guest (e.g. vfio hot reset for ++ * ISM during plug) ++ */ ++ if (pbdev->interp) { ++ /* Take this opportunity to make sure we are sync'd with host */ ++ if (!s390_pci_get_host_fh(pbdev, &pbdev->fh) || ++ !(pbdev->fh & FH_MASK_ENABLE)) { ++ stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH); ++ goto out; ++ } ++ } + pbdev->fh |= FH_MASK_ENABLE; + pbdev->state = ZPCI_FS_ENABLED; + stl_p(&ressetpci->fh, pbdev->fh); +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +new file mode 100644 +index 0000000000..0f16104a74 +--- /dev/null ++++ b/hw/s390x/s390-pci-kvm.c +@@ -0,0 +1,22 @@ ++/* ++ * s390 zPCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm/kvm_s390x.h" ++#include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "cpu_models.h" ++ ++bool s390_pci_kvm_interp_allowed(void) ++{ ++ return kvm_s390_get_zpci_op() && !s390_is_pv(); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index da3cde2bb4..a9843dfe97 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -350,6 +350,7 @@ struct S390PCIBusDevice { + IndAddr *indicator; + bool pci_unplug_request_processed; + bool unplug_requested; ++ bool interp; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +new file mode 100644 +index 0000000000..80a2e7d0ca +--- /dev/null ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -0,0 +1,24 @@ ++/* ++ * s390 PCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#ifndef HW_S390_PCI_KVM_H ++#define HW_S390_PCI_KVM_H ++ ++#ifdef CONFIG_KVM ++bool s390_pci_kvm_interp_allowed(void); ++#else ++static inline bool s390_pci_kvm_interp_allowed(void) ++{ ++ return false; ++} ++#endif ++ ++#endif +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index ba04997da1..30712487d4 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -158,6 +158,7 @@ static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; ++static int cap_zpci_op; + + static bool mem_op_storage_key_support; + +@@ -363,6 +364,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); ++ cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2579,3 +2581,8 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++int kvm_s390_get_zpci_op(void) ++{ ++ return cap_zpci_op; ++} +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index 05a5e1e6f4..aaae8570de 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -27,6 +27,7 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); + int kvm_s390_get_ri(void); ++int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch b/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch new file mode 100644 index 0000000..2778225 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch @@ -0,0 +1,192 @@ +From b98a5bc4c21284dd0a8a1c86b91af81fcb75f060 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:35 -0400 +Subject: [PATCH 10/42] s390x/pci: let intercept devices have separate PCI + groups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [10/41] 1545bdcd2e21386afa9869f0414e96eecb62647d + +Let's use the reserved pool of simulated PCI groups to allow intercept +devices to have separate groups from interpreted devices as some group +values may be different. If we run out of simulated PCI groups, subsequent +intercept devices just get the default group. +Furthermore, if we encounter any PCI groups from hostdevs that are marked +as simulated, let's just assign them to the default group to avoid +conflicts between host simulated groups and our own simulated groups. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-7-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 30dcf4f7fd23bef7d72a2454c60881710fd4c785) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 19 ++++++++++++++-- + hw/s390x/s390-pci-vfio.c | 40 ++++++++++++++++++++++++++++++--- + include/hw/s390x/s390-pci-bus.h | 6 ++++- + 3 files changed, 59 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index cd152ce711..d8b1e44a02 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -748,13 +748,14 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) + object_unref(OBJECT(iommu)); + } + +-S390PCIGroup *s390_group_create(int id) ++S390PCIGroup *s390_group_create(int id, int host_id) + { + S390PCIGroup *group; + S390pciState *s = s390_get_phb(); + + group = g_new0(S390PCIGroup, 1); + group->id = id; ++ group->host_id = host_id; + QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link); + return group; + } +@@ -772,12 +773,25 @@ S390PCIGroup *s390_group_find(int id) + return NULL; + } + ++S390PCIGroup *s390_group_find_host_sim(int host_id) ++{ ++ S390PCIGroup *group; ++ S390pciState *s = s390_get_phb(); ++ ++ QTAILQ_FOREACH(group, &s->zpci_groups, link) { ++ if (group->id >= ZPCI_SIM_GRP_START && group->host_id == host_id) { ++ return group; ++ } ++ } ++ return NULL; ++} ++ + static void s390_pci_init_default_group(void) + { + S390PCIGroup *group; + ClpRspQueryPciGrp *resgrp; + +- group = s390_group_create(ZPCI_DEFAULT_FN_GRP); ++ group = s390_group_create(ZPCI_DEFAULT_FN_GRP, ZPCI_DEFAULT_FN_GRP); + resgrp = &group->zpci_group; + resgrp->fr = 1; + resgrp->dasm = 0; +@@ -825,6 +839,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) + NULL, g_free); + s->zpci_table = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, NULL); + s->bus_no = 0; ++ s->next_sim_grp = ZPCI_SIM_GRP_START; + QTAILQ_INIT(&s->pending_sei); + QTAILQ_INIT(&s->zpci_devs); + QTAILQ_INIT(&s->zpci_dma_limit); +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 08bcc55e85..338f436e87 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -150,13 +150,18 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + { + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_group *cap; ++ S390pciState *s = s390_get_phb(); + ClpRspQueryPciGrp *resgrp; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint8_t start_gid = pbdev->zpci_fn.pfgid; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + +- /* If capability not provided, just use the default group */ +- if (hdr == NULL) { ++ /* ++ * If capability not provided or the underlying hostdev is simulated, just ++ * use the default group. ++ */ ++ if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) { + trace_s390_pci_clp_cap(vpci->vbasedev.name, + VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; +@@ -165,11 +170,40 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + } + cap = (void *) hdr; + ++ /* ++ * For an intercept device, let's use an existing simulated group if one ++ * one was already created for other intercept devices in this group. ++ * If not, create a new simulated group if any are still available. ++ * If all else fails, just fall back on the default group. ++ */ ++ if (!pbdev->interp) { ++ pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid); ++ if (pbdev->pci_group) { ++ /* Use existing simulated group */ ++ pbdev->zpci_fn.pfgid = pbdev->pci_group->id; ++ return; ++ } else { ++ if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) { ++ /* All out of simulated groups, use default */ ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); ++ pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; ++ pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP); ++ return; ++ } else { ++ /* We can assign a new simulated group */ ++ pbdev->zpci_fn.pfgid = s->next_sim_grp; ++ s->next_sim_grp++; ++ /* Fall through to create the new sim group using CLP info */ ++ } ++ } ++ } ++ + /* See if the PCI group is already defined, create if not */ + pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid); + + if (!pbdev->pci_group) { +- pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid); ++ pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid); + + resgrp = &pbdev->pci_group->zpci_group; + if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) { +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 5b09f0cf2f..0605fcea24 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -315,13 +315,16 @@ typedef struct ZpciFmb { + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + + #define ZPCI_DEFAULT_FN_GRP 0xFF ++#define ZPCI_SIM_GRP_START 0xF0 + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; ++ int host_id; + QTAILQ_ENTRY(S390PCIGroup) link; + } S390PCIGroup; +-S390PCIGroup *s390_group_create(int id); ++S390PCIGroup *s390_group_create(int id, int host_id); + S390PCIGroup *s390_group_find(int id); ++S390PCIGroup *s390_group_find_host_sim(int host_id); + + struct S390PCIBusDevice { + DeviceState qdev; +@@ -370,6 +373,7 @@ struct S390pciState { + QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; + QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; + QTAILQ_HEAD(, S390PCIGroup) zpci_groups; ++ uint8_t next_sim_grp; + }; + + S390pciState *s390_get_phb(void); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch b/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch new file mode 100644 index 0000000..bbd9612 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch @@ -0,0 +1,52 @@ +From 65f90bfccf7500978879c15104a79de58173a06b Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:36 -0400 +Subject: [PATCH 11/42] s390x/pci: reflect proper maxstbl for groups of + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [11/41] 9ac2f5dedef3d743ef621525eef222a3e09d63b3 + +The maximum supported store block length might be different depending +on whether the instruction is interpretively executed (firmware-reported +maximum) or handled via userspace intercept (host kernel API maximum). +Choose the best available value during group creation. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-8-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 9ee8f7e46a7d42ede69a4780200129bf1acb0d01) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 338f436e87..2aefa508a0 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -213,7 +213,11 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->msia = cap->msi_addr; + resgrp->mui = cap->mui; + resgrp->i = cap->noi; +- resgrp->maxstbl = cap->maxstbl; ++ if (pbdev->interp && hdr->version >= 2) { ++ resgrp->maxstbl = cap->imaxstbl; ++ } else { ++ resgrp->maxstbl = cap->maxstbl; ++ } + resgrp->version = cap->version; + resgrp->dtsm = ZPCI_DTSM; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch new file mode 100644 index 0000000..215c5dd --- /dev/null +++ b/SOURCES/kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch @@ -0,0 +1,147 @@ +From 9ec96a236be84e34b16681e658d3910fc3877a44 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 9 Dec 2022 14:57:00 -0500 +Subject: [PATCH 11/11] s390x/pci: reset ISM passthrough devices on shutdown + and system reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] c857d022c7c2f43cdeb66c4f6acfd9272c925b35 + +ISM device firmware stores unique state information that can +can cause a wholesale unmap of the associated IOMMU (e.g. when +we get a termination signal for QEMU) to trigger firmware errors +because firmware believes we are attempting to invalidate entries +that are still in-use by the guest OS (when in fact that guest is +in the process of being terminated or rebooted). +To alleviate this, register both a shutdown notifier (for unexpected +termination cases e.g. virsh destroy) as well as a reset callback +(for cases like guest OS reboot). For each of these scenarios, trigger +PCI device reset; this is enough to indicate to firmware that the IOMMU +is no longer in-use by the guest OS, making it safe to invalidate any +associated IOMMU entries. + +Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") +Signed-off-by: Matthew Rosato +Message-Id: <20221209195700.263824-1-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +[thuth: Adjusted the hunk in s390-pci-vfio.c due to different context] +Signed-off-by: Thomas Huth +(cherry picked from commit 03451953c79e6b31f7860ee0c35b28e181d573c1) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ + hw/s390x/s390-pci-vfio.c | 2 ++ + include/hw/s390x/s390-pci-bus.h | 5 +++++ + 3 files changed, 35 insertions(+) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index d8b1e44a02..2d92848b0f 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -24,6 +24,8 @@ + #include "hw/pci/msi.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/reset.h" ++#include "sysemu/runstate.h" + + #ifndef DEBUG_S390PCI_BUS + #define DEBUG_S390PCI_BUS 0 +@@ -150,10 +152,30 @@ out: + psccb->header.response_code = cpu_to_be16(rc); + } + ++static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) ++{ ++ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, ++ shutdown_notifier); ++ ++ pci_device_reset(pbdev->pdev); ++} ++ ++static void s390_pci_reset_cb(void *opaque) ++{ ++ S390PCIBusDevice *pbdev = opaque; ++ ++ pci_device_reset(pbdev->pdev); ++} ++ + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ notifier_remove(&pbdev->shutdown_notifier); ++ qemu_unregister_reset(s390_pci_reset_cb, pbdev); ++ } ++ + /* Unplug the PCI device */ + if (pbdev->pdev) { + DeviceState *pdev = DEVICE(pbdev->pdev); +@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->fh |= FH_SHM_VFIO; + pbdev->forwarding_assist = false; + } ++ /* Register shutdown notifier and reset callback for ISM devices */ ++ if (pbdev->pft == ZPCI_PFT_ISM) { ++ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; ++ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); ++ qemu_register_reset(s390_pci_reset_cb, pbdev); ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 99806e2a84..69af35f4fe 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ /* Store function type separately for type-specific behavior */ ++ pbdev->pft = cap->pft; + + /* + * If appropriate, reduce the size of the supported DMA aperture reported +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 1c46e3a269..e0a9f9385b 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -39,6 +39,9 @@ + #define UID_CHECKING_ENABLED 0x01 + #define ZPCI_DTSM 0x40 + ++/* zPCI Function Types */ ++#define ZPCI_PFT_ISM 5 ++ + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) +@@ -344,6 +347,7 @@ struct S390PCIBusDevice { + uint16_t noi; + uint16_t maxstbl; + uint8_t sum; ++ uint8_t pft; + S390PCIGroup *pci_group; + ClpRspQueryPci zpci_fn; + S390MsixInfo msix; +@@ -352,6 +356,7 @@ struct S390PCIBusDevice { + MemoryRegion msix_notify_mr; + IndAddr *summary_ind; + IndAddr *indicator; ++ Notifier shutdown_notifier; + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch new file mode 100644 index 0000000..e1df69f --- /dev/null +++ b/SOURCES/kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch @@ -0,0 +1,91 @@ +From a0b6c21b555566eb6bc38643269d14c82dfd0226 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 28 Oct 2022 15:47:58 -0400 +Subject: [PATCH 10/11] s390x/pci: shrink DMA aperture to be bound by vfio DMA + limit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 250: s390x/pci: reset ISM passthrough devices on shutdown and system reset +RH-Bugzilla: 2163713 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] aa241dd250ad5e696b67c87dddc31ee5aaee9c0e + +Currently, s390x-pci performs accounting against the vfio DMA +limit and triggers the guest to clean up mappings when the limit +is reached. Let's go a step further and also limit the size of +the supported DMA aperture reported to the guest based upon the +initial vfio DMA limit reported for the container (if less than +than the size reported by the firmware/host zPCI layer). This +avoids processing sections of the guest DMA table during global +refresh that, for common use cases, will never be used anway, and +makes exhausting the vfio DMA limit due to mismatch between guest +aperture size and host limit far less likely and more indicitive +of an error. + +Signed-off-by: Matthew Rosato +Message-Id: <20221028194758.204007-4-mjrosato@linux.ibm.com> +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit df202e3ff3fccb49868e08f20d0bda86cb953fbe) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 11 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 2aefa508a0..99806e2a84 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -84,6 +84,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + cnt->users = 1; + cnt->avail = avail; + QTAILQ_INSERT_TAIL(&s->zpci_dma_limit, cnt, link); ++ pbdev->iommu->max_dma_limit = avail; + return cnt; + } + +@@ -103,6 +104,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_base *cap; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint64_t vfio_size; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); + +@@ -122,6 +124,15 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + /* The following values remain 0 until we support other FMB formats */ + pbdev->zpci_fn.fmbl = 0; + pbdev->zpci_fn.pft = 0; ++ ++ /* ++ * If appropriate, reduce the size of the supported DMA aperture reported ++ * to the guest based upon the vfio DMA limit. ++ */ ++ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS; ++ if (vfio_size < (cap->end_dma - cap->start_dma + 1)) { ++ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1; ++ } + } + + static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 0605fcea24..1c46e3a269 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -278,6 +278,7 @@ struct S390PCIIOMMU { + uint64_t g_iota; + uint64_t pba; + uint64_t pal; ++ uint64_t max_dma_limit; + GHashTable *iotlb; + S390PCIDMACount *dma_limit; + }; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch b/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch new file mode 100644 index 0000000..399f115 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch @@ -0,0 +1,49 @@ +From 55294fc4a955491f1fd947e4d98bd6df832c88ba Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:03 -0500 +Subject: [PATCH 01/42] s390x/pci: use a reserved ID for the default PCI group +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [1/41] ad3ed38dec95acf0da04d7669fe772d798d039fc + +The current default PCI group being used can technically collide with a +real group ID passed from a hostdev. Let's instead use a group ID that +comes from a special pool (0xF0-0xFF) that is architected to be reserved +for simulated devices. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-2-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit b2892a2b9d45d25b909108ca633d19f9d8d673f5) +Signed-off-by: Cédric Le Goater +--- + include/hw/s390x/s390-pci-bus.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index aa891c178d..2727e7bdef 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -313,7 +313,7 @@ typedef struct ZpciFmb { + } ZpciFmb; + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + +-#define ZPCI_DEFAULT_FN_GRP 0x20 ++#define ZPCI_DEFAULT_FN_GRP 0xFF + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch b/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch new file mode 100644 index 0000000..2fda07a --- /dev/null +++ b/SOURCES/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch @@ -0,0 +1,59 @@ +From c251a7a16e776072b9c7bbc4e61cfa4f52599b0a Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:05 -0500 +Subject: [PATCH 03/42] s390x/pci: use the passthrough measurement update + interval +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [3/41] bc31ea731fe64e51522f1202e65528311397b919 + +We may have gotten a measurement update interval from the underlying host +via vfio -- Use it to set the interval via which we update the function +measurement block. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit cb6d6a3e6aa1226b67fd218953dcb3866c3a6845) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 11b7f6bfa1..07bab85ce5 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1046,7 +1046,7 @@ static void fmb_update(void *opaque) + sizeof(pbdev->fmb.last_update))) { + return; + } +- timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI); ++ timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, +@@ -1204,7 +1204,8 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + pbdev->fmb_addr = fmb_addr; + timer_mod(pbdev->fmb_timer, +- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + DEFAULT_MUI); ++ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + ++ pbdev->pci_group->zpci_group.mui); + break; + } + default: +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-pv-Add-dump-support.patch b/SOURCES/kvm-s390x-pv-Add-dump-support.patch new file mode 100644 index 0000000..dec84fd --- /dev/null +++ b/SOURCES/kvm-s390x-pv-Add-dump-support.patch @@ -0,0 +1,445 @@ +From 86aeb4fd7ff9395afba574e422d83f990ce1f047 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:22 +0000 +Subject: [PATCH 41/42] s390x: pv: Add dump support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [41/41] 2731c2329276e76013e3b3df21e9743bc74edd2b + +Sometimes dumping a guest from the outside is the only way to get the +data that is needed. This can be the case if a dumping mechanism like +KDUMP hasn't been configured or data needs to be fetched at a specific +point. Dumping a protected guest from the outside without help from +fw/hw doesn't yield sufficient data to be useful. Hence we now +introduce PV dump support. + +The PV dump support works by integrating the firmware into the dump +process. New Ultravisor calls are used to initiate the dump process, +dump cpu data, dump memory state and lastly complete the dump process. +The UV calls are exposed by KVM via the new KVM_PV_DUMP command and +its subcommands. The guest's data is fully encrypted and can only be +decrypted by the entity that owns the customer communication key for +the dumped guest. Also dumping needs to be allowed via a flag in the +SE header. + +On the QEMU side of things we store the PV dump data in the newly +introduced architecture ELF sections (storage state and completion +data) and the cpu notes (for cpu dump data). + +Users can use the zgetdump tool to convert the encrypted QEMU dump to an +unencrypted one. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Message-Id: <20221017083822.43118-11-frankja@linux.ibm.com> +(cherry picked from commit 113d8f4e95cf0450bea421263de6ec016c779ad0) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 12 +- + include/sysemu/dump.h | 5 + + target/s390x/arch_dump.c | 262 +++++++++++++++++++++++++++++++++++---- + 3 files changed, 246 insertions(+), 33 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4aa8fb64d2..5dee060b73 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -709,9 +709,9 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + int64_t size, left, right; + +@@ -729,9 +729,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, + return size; + } + +-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + if (filter_area_length) { + /* return -1 if the block is not within filter area */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 38ccac7190..4ffed0b659 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -215,4 +215,9 @@ typedef struct DumpState { + uint16_t cpu_to_dump16(DumpState *s, uint16_t val); + uint32_t cpu_to_dump32(DumpState *s, uint32_t val); + uint64_t cpu_to_dump64(DumpState *s, uint64_t val); ++ ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); + #endif +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index f60a14920d..a2329141e8 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -12,11 +12,13 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "cpu.h" + #include "s390x-internal.h" + #include "elf.h" + #include "sysemu/dump.h" +- ++#include "hw/s390x/pv.h" ++#include "kvm/kvm_s390x.h" + + struct S390xUserRegsStruct { + uint64_t psw[2]; +@@ -76,9 +78,16 @@ typedef struct noteStruct { + uint64_t todcmp; + uint32_t todpreg; + uint64_t ctrs[16]; ++ uint8_t dynamic[1]; /* ++ * Would be a flexible array member, if ++ * that was legal inside a union. Real ++ * size comes from PV info interface. ++ */ + } contents; + } QEMU_PACKED Note; + ++static bool pv_dump_initialized; ++ + static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id) + { + int i; +@@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id) + note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa)); + } + ++static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id) ++{ ++ note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA); ++ if (!pv_dump_initialized) { ++ return; ++ } ++ kvm_s390_dump_cpu(cpu, ¬e->contents.dynamic); ++} + + typedef struct NoteFuncDescStruct { + int contents_size; ++ uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */ + void (*note_contents_func)(Note *note, S390CPU *cpu, int id); ++ bool pvonly; + } NoteFuncDesc; + + static const NoteFuncDesc note_core[] = { +- {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus}, +- {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false}, ++ {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false}, ++ { 0, NULL, NULL, false} + }; + + static const NoteFuncDesc note_linux[] = { +- {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix}, +- {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs}, +- {sizeof_field(Note, contents.timer), s390x_write_elf64_timer}, +- {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp}, +- {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg}, +- {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo}, +- {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi}, +- {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false}, ++ {sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false}, ++ {sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false}, ++ {sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false}, ++ {sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false}, ++ {sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false}, ++ {sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false}, ++ {sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false}, ++ {0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true}, ++ { 0, NULL, NULL, false} + }; + + static int s390x_write_elf64_notes(const char *note_name, +@@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name, + DumpState *s, + const NoteFuncDesc *funcs) + { +- Note note; ++ Note note, *notep; + const NoteFuncDesc *nf; +- int note_size; ++ int note_size, content_size; + int ret = -1; + + assert(strlen(note_name) < sizeof(note.name)); + + for (nf = funcs; nf->note_contents_func; nf++) { +- memset(¬e, 0, sizeof(note)); +- note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); +- note.hdr.n_descsz = cpu_to_be32(nf->contents_size); +- g_strlcpy(note.name, note_name, sizeof(note.name)); +- (*nf->note_contents_func)(¬e, cpu, id); ++ notep = ¬e; ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ ++ content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size; ++ note_size = sizeof(note) - sizeof(notep->contents) + content_size; ++ ++ /* Notes with dynamic sizes need to allocate a note */ ++ if (nf->note_size_func) { ++ notep = g_malloc(note_size); ++ } ++ ++ memset(notep, 0, sizeof(note)); + +- note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, s); ++ /* Setup note header data */ ++ notep->hdr.n_descsz = cpu_to_be32(content_size); ++ notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); ++ g_strlcpy(notep->name, note_name, sizeof(notep->name)); ++ ++ /* Get contents and write them out */ ++ (*nf->note_contents_func)(notep, cpu, id); ++ ret = f(notep, note_size, s); ++ ++ if (nf->note_size_func) { ++ g_free(notep); ++ } + + if (ret < 0) { + return -1; +@@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + ++/* PV dump section size functions */ ++static uint64_t get_mem_state_size_from_len(uint64_t len) ++{ ++ return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state(); ++} ++ ++static uint64_t get_size_mem_state(DumpState *s) ++{ ++ return get_mem_state_size_from_len(s->total_size); ++} ++ ++static uint64_t get_size_completion_data(DumpState *s) ++{ ++ return kvm_s390_pv_dmp_get_size_completion_data(); ++} ++ ++/* PV dump section data functions*/ ++static int get_data_completion(DumpState *s, uint8_t *buff) ++{ ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ rc = kvm_s390_dump_completion_data(buff); ++ if (!rc) { ++ pv_dump_initialized = false; ++ } ++ return rc; ++} ++ ++static int get_mem_state(DumpState *s, uint8_t *buff) ++{ ++ int64_t memblock_size, memblock_start; ++ GuestPhysBlock *block; ++ uint64_t off; ++ int rc; ++ ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, ++ s->filter_area_length); ++ if (memblock_start == -1) { ++ continue; ++ } ++ ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, ++ s->filter_area_length); ++ ++ off = get_mem_state_size_from_len(block->target_start); ++ ++ rc = kvm_s390_dump_mem_state(block->target_start, ++ get_mem_state_size_from_len(memblock_size), ++ buff + off); ++ if (rc) { ++ return rc; ++ } ++ } ++ ++ return 0; ++} ++ ++static struct sections { ++ uint64_t (*sections_size_func)(DumpState *s); ++ int (*sections_contents_func)(DumpState *s, uint8_t *buff); ++ char sctn_str[12]; ++} sections[] = { ++ { get_size_mem_state, get_mem_state, "pv_mem_meta"}, ++ { get_size_completion_data, get_data_completion, "pv_compl"}, ++ {NULL , NULL, ""} ++}; ++ ++static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff) ++{ ++ Elf64_Shdr *shdr = (void *)buff; ++ struct sections *sctn = sections; ++ uint64_t off = s->section_offset; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ ++ for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) { ++ memset(shdr, 0, sizeof(*shdr)); ++ shdr->sh_type = SHT_PROGBITS; ++ shdr->sh_offset = off; ++ shdr->sh_size = sctn->sections_size_func(s); ++ shdr->sh_name = s->string_table_buf->len; ++ g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str)); ++ } ++ ++ return (uintptr_t)shdr - (uintptr_t)buff; ++} ++ ++ ++/* Add arch specific number of sections and their respective sizes */ ++static void arch_sections_add(DumpState *s) ++{ ++ struct sections *sctn = sections; ++ ++ /* ++ * We only do a PV dump if we are running a PV guest, KVM supports ++ * the dump API and we got valid dump length information. ++ */ ++ if (!s390_is_pv() || !kvm_s390_get_protected_dump() || ++ !kvm_s390_pv_info_basic_valid()) { ++ return; ++ } ++ ++ /* ++ * Start the UV dump process by doing the initialize dump call via ++ * KVM as the proxy. ++ */ ++ if (!kvm_s390_dump_init()) { ++ pv_dump_initialized = true; ++ } else { ++ /* ++ * Dump init failed, maybe the guest owner disabled dumping. ++ * We'll continue the non-PV dump process since this is no ++ * reason to crash qemu. ++ */ ++ return; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ s->shdr_num += 1; ++ s->elf_section_data_size += sctn->sections_size_func(s); ++ } ++} ++ ++/* ++ * After the PV dump has been initialized, the CPU data has been ++ * fetched and memory has been dumped, we need to grab the tweak data ++ * and the completion data. ++ */ ++static int arch_sections_write(DumpState *s, uint8_t *buff) ++{ ++ struct sections *sctn = sections; ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return -EINVAL; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ rc = sctn->sections_contents_func(s, buff); ++ buff += sctn->sections_size_func(s); ++ if (rc) { ++ return rc; ++ } ++ } ++ return 0; ++} ++ + int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks) + { + info->d_machine = EM_S390; + info->d_endian = ELFDATA2MSB; + info->d_class = ELFCLASS64; +- ++ /* ++ * This is evaluated for each dump so we can freely switch ++ * between PV and non-PV. ++ */ ++ if (s390_is_pv() && kvm_s390_get_protected_dump() && ++ kvm_s390_pv_info_basic_valid()) { ++ info->arch_sections_add_fn = *arch_sections_add; ++ info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; ++ info->arch_sections_write_fn = *arch_sections_write; ++ } else { ++ info->arch_sections_add_fn = NULL; ++ info->arch_sections_write_hdr_fn = NULL; ++ info->arch_sections_write_fn = NULL; ++ } + return 0; + } + +@@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + { + int name_size = 8; /* "LINUX" or "CORE" + pad */ + size_t elf_note_size = 0; +- int note_head_size; ++ int note_head_size, content_size; + const NoteFuncDesc *nf; + + assert(class == ELFCLASS64); +@@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + note_head_size = sizeof(Elf64_Nhdr); + + for (nf = note_core; nf->note_contents_func; nf++) { +- elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size; + } + for (nf = note_linux; nf->note_contents_func; nf++) { ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ content_size = nf->contents_size ? nf->contents_size : nf->note_size_func(); + elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ content_size; + } + + return (elf_note_size) * nr_cpus; +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch b/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch new file mode 100644 index 0000000..71dc7dc --- /dev/null +++ b/SOURCES/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch @@ -0,0 +1,55 @@ +From c7b14d3af7c73a3bf0c00911b85f202840d9b414 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 7 Nov 2022 17:13:49 +0100 +Subject: [PATCH 12/42] s390x/s390-virtio-ccw: Switch off zPCI enhancements on + older machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [12/41] 61e32bab6d68ee9abd6a0751944af82e002b05b4 + +zPCI enhancement features (interpretation and forward assist) were +recently introduced to improve performance on PCI passthrough devices. +To maintain the same behaviour on older Z machines, deactivate the +features with the associated properties. + +Signed-off-by: Cédric Le Goater +Message-Id: <20221107161349.1032730-3-clg@kaod.org> +Reviewed-by: Matthew Rosato +Signed-off-by: Thomas Huth +(cherry picked from commit d3d1a406127f7da482eafbdc871c120c2770bb91) +[ clg: applied zPCI restrictions to rhel8.5.0 machine and below ] +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bec270598b..bd80e72cf8 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1130,8 +1130,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch b/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch new file mode 100644 index 0000000..b07c902 --- /dev/null +++ b/SOURCES/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch @@ -0,0 +1,70 @@ +From 90fcfe4b07afc5299c5ee69fa663ca46b597fd4b Mon Sep 17 00:00:00 2001 +From: Nico Boehr +Date: Wed, 12 Oct 2022 14:32:29 +0200 +Subject: [PATCH] s390x/tod-kvm: don't save/restore the TOD in PV guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 244: s390x/tod-kvm: don't save/restore the TOD in PV guests +RH-Bugzilla: 2155448 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] 3cb3154dd7c1549c54cf8c0483b5f23b235f6db3 + +Under PV, the guest's TOD clock is under control of the ultravisor and the +hypervisor cannot change it. + +With upcoming kernel changes[1], the Linux kernel will reject QEMU's +request to adjust the guest's clock in this case, so don't attempt to set +the clock. + +This avoids the following warning message on save/restore of a PV guest: + +warning: Unable to set KVM guest TOD clock: Operation not supported + +[1] https://lore.kernel.org/all/20221011160712.928239-2-nrb@linux.ibm.com/ + +Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") +Signed-off-by: Nico Boehr +Message-Id: <20221012123229.1196007-1-nrb@linux.ibm.com> +[thuth: Add curly braces] +Signed-off-by: Thomas Huth +(cherry picked from commit 38621181ae3cbec62e3490fbc14f6ac01642d07a) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/tod-kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c +index ec855811ae..c804c979b5 100644 +--- a/hw/s390x/tod-kvm.c ++++ b/hw/s390x/tod-kvm.c +@@ -13,6 +13,7 @@ + #include "qemu/module.h" + #include "sysemu/runstate.h" + #include "hw/s390x/tod.h" ++#include "hw/s390x/pv.h" + #include "kvm/kvm_s390x.h" + + static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) +@@ -84,6 +85,14 @@ static void kvm_s390_tod_vm_state_change(void *opaque, bool running, + S390TODState *td = opaque; + Error *local_err = NULL; + ++ /* ++ * Under PV, the clock is under ultravisor control, hence we cannot restore ++ * it on resume. ++ */ ++ if (s390_is_pv()) { ++ return; ++ } ++ + if (running && td->stopped) { + /* Set the old TOD when running the VM - start the TOD clock. */ + kvm_s390_set_tod_raw(&td->base, &local_err); +-- +2.37.3 + diff --git a/SOURCES/kvm-s390x.conf b/SOURCES/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/SOURCES/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch b/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch new file mode 100644 index 0000000..0fc1a71 --- /dev/null +++ b/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch @@ -0,0 +1,97 @@ +From a9a4dfdd6312e192e9134d46edfac4c1b1bfa63d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 22 Aug 2022 14:53:20 +0200 +Subject: [PATCH] scsi-generic: Fix emulated block limits VPD page + +RH-Author: Kevin Wolf +RH-MergeRequest: 212: scsi-generic: Fix emulated block limits VPD page +RH-Commit: [1/1] d3ba6b2e03039043716ddc6b7d4a424d92249081 +RH-Bugzilla: 2120279 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Reitz +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi + +Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum +transfer length is calculated for patching block limits VPD page in an +INQUIRY response. + +The same updates also need to be made for the case where the host device +does not support the block limits VPD page at all and we emulate the +whole page. + +Without this fix, on host block devices a maximum transfer length of +(INT_MAX - sector_size) bytes is advertised to the guest, resulting in +I/O errors when a request that exceeds the host limits is made by the +guest. (Prior to commit 24b36e9813e, this code path would use the +max_transfer value from the host instead of INT_MAX, but still miss the +fix from 01ef8185b80 where max_transfer is also capped to max_iov +host pages, so it would be less wrong, but still wrong.) + +Cc: qemu-stable@nongnu.org +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251 +Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a +Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020 +Signed-off-by: Kevin Wolf +Message-Id: <20220822125320.48257-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee) + +Resolved conflict: qemu_real_host_page_size() is a getter function in +current upstream, but still just a public global variable downstream. + +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-generic.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 0306ccc7b1..3742899839 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk, + return 0; + } + ++static uint64_t calculate_max_transfer(SCSIDevice *s) ++{ ++ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); ++ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); ++ ++ assert(max_transfer); ++ max_transfer = MIN_NON_ZERO(max_transfer, ++ max_iov * qemu_real_host_page_size); ++ ++ return max_transfer / s->blocksize; ++} ++ + static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + { + uint8_t page, page_idx; +@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0) { +- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); +- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); +- +- assert(max_transfer); +- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) +- / s->blocksize; ++ uint64_t max_transfer = calculate_max_transfer(s); + stl_be_p(&r->buf[8], max_transfer); + /* Also take care of the opt xfer len. */ + stl_be_p(&r->buf[12], +@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) + uint8_t buf[64]; + + SCSIBlockLimits bl = { +- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize ++ .max_io_sectors = calculate_max_transfer(s), + }; + + memset(r->buf, 0, r->buflen); +-- +2.35.3 + diff --git a/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch new file mode 100644 index 0000000..1343855 --- /dev/null +++ b/SOURCES/kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch @@ -0,0 +1,176 @@ +From df836ee4b4e2a69cca5042a3a9daf2c41dc2aa58 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:16 -0500 +Subject: [PATCH 11/13] scsi: protect req->aiocb with AioContext lock + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/3] e6a6d4109713e0fd6d6c515535c66196fea98688 + +If requests are being processed in the IOThread when a SCSIDevice is +unplugged, scsi_device_purge_requests() -> scsi_req_cancel_async() races +with I/O completion callbacks. Both threads load and store req->aiocb. +This can lead to assert(r->req.aiocb == NULL) failures and undefined +behavior. + +Protect r->req.aiocb with the AioContext lock to prevent the race. + +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7b7fc3d0102dafe8eb44802493036a526e921a71) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/scsi-disk.c | 23 ++++++++++++++++------- + hw/scsi/scsi-generic.c | 11 ++++++----- + 2 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index d4914178ea..179ce22c4a 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -270,9 +270,11 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +@@ -354,10 +356,11 @@ static void scsi_dma_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -390,10 +393,11 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -443,10 +447,11 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -527,10 +532,11 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { +@@ -1659,10 +1665,11 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); + if (scsi_disk_req_check_error(r, ret, true)) { + scsi_req_unref(&r->req); + g_free(data); +@@ -1738,9 +1745,11 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); ++ + if (scsi_disk_req_check_error(r, ret, true)) { + goto done; + } +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 3742899839..a1a40df64b 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -111,10 +111,11 @@ static void scsi_command_complete(void *opaque, int ret) + SCSIGenericReq *r = (SCSIGenericReq *)opaque; + SCSIDevice *s = r->req.dev; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + scsi_command_complete_noio(r, ret); + aio_context_release(blk_get_aio_context(s->conf.blk)); + } +@@ -269,11 +270,11 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +@@ -387,11 +388,11 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + ++ aio_context_acquire(blk_get_aio_context(s->conf.blk)); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); + goto done; +-- +2.37.3 + diff --git a/SOURCES/kvm-setup b/SOURCES/kvm-setup new file mode 100644 index 0000000..3bfedf6 --- /dev/null +++ b/SOURCES/kvm-setup @@ -0,0 +1,49 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; + s390x) + kvm_setup_s390x + ;; +esac + +exit 0 diff --git a/SOURCES/kvm-setup.service b/SOURCES/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/SOURCES/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch new file mode 100644 index 0000000..c6fcf61 --- /dev/null +++ b/SOURCES/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch @@ -0,0 +1,131 @@ +From afe1a63fe0cf863e024889edd82b9a380bfa8230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 5 Jan 2022 12:38:47 +0000 +Subject: [PATCH 2/6] softmmu: fix device deletion events with -device JSON + syntax +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 103: Fix hot unplug of devices created with -device JSON syntax +RH-Commit: [1/1] 64cbc78bcb46bdb24d5f589ceb5ad598c388e447 +RH-Bugzilla: 2033279 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Jano Tomko +RH-Acked-by: Daniel P. Berrangé + +The -device JSON syntax impl leaks a reference on the created +DeviceState instance. As a result when you hot-unplug the +device, the device_finalize method won't be called and thus +it will fail to emit the required DEVICE_DELETED event. + +A 'json-cli' feature was previously added against the +'device_add' QMP command QAPI schema to indicated to mgmt +apps that -device supported JSON syntax. Given the hotplug +bug that feature flag is not usable for its purpose, so +we add a new 'json-cli-hotplug' feature to indicate the +-device supports JSON without breaking hotplug. + +Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 +Signed-off-by: Daniel P. Berrangé +Message-Id: <20220105123847.4047954-2-berrange@redhat.com> +Reviewed-by: Laurent Vivier +Tested-by: Ján Tomko +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) +Signed-off-by: Kevin Wolf +--- + qapi/qdev.json | 5 ++++- + softmmu/vl.c | 4 +++- + tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/qapi/qdev.json b/qapi/qdev.json +index 69656b14df..26cd10106b 100644 +--- a/qapi/qdev.json ++++ b/qapi/qdev.json +@@ -44,6 +44,9 @@ + # @json-cli: If present, the "-device" command line option supports JSON + # syntax with a structure identical to the arguments of this + # command. ++# @json-cli-hotplug: If present, the "-device" command line option supports JSON ++# syntax without the reference counting leak that broke ++# hot-unplug + # + # Notes: + # +@@ -74,7 +77,7 @@ + { 'command': 'device_add', + 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, + 'gen': false, # so we can get the additional arguments +- 'features': ['json-cli'] } ++ 'features': ['json-cli', 'json-cli-hotplug'] } + + ## + # @device_del: +diff --git a/softmmu/vl.c b/softmmu/vl.c +index d46b8fb4ab..b3829e2edd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { ++ DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it +@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ +- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c +index 559d47727a..ad79bd4c14 100644 +--- a/tests/qtest/device-plug-test.c ++++ b/tests/qtest/device-plug-test.c +@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) + qtest_quit(qtest); + } + ++static void test_pci_unplug_json_request(void) ++{ ++ QTestState *qtest = qtest_initf( ++ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); ++ ++ /* ++ * Request device removal. As the guest is not running, the request won't ++ * be processed. However during system reset, the removal will be ++ * handled, removing the device. ++ */ ++ device_del(qtest, "dev0"); ++ system_reset(qtest); ++ wait_device_deleted_event(qtest, "dev0"); ++ ++ qtest_quit(qtest); ++} ++ + static void test_ccw_unplug(void) + { + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); +@@ -145,6 +162,8 @@ int main(int argc, char **argv) + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); ++ qtest_add_func("/device-plug/pci-unplug-json-request", ++ test_pci_unplug_json_request); + + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", +-- +2.27.0 + diff --git a/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch b/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch new file mode 100644 index 0000000..519c48d --- /dev/null +++ b/SOURCES/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch @@ -0,0 +1,175 @@ +From fe4e22b9ccf2eb55d61eccf5050fb7aeafb5fe20 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 3/3] softmmu/physmem: Introduce MemTxAttrs::memory field and + MEMTX_ACCESS_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [3/3] b1ebc1e99f21ba0b9eccb284e260b56c7a8e64d8 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 +Conflicts: memalign.h has not been introduced in this version. Instead, + we include osdep.h where the function prototypes are to be + found. + +commit 3ab6fdc91b72e156da22848f0003ff4225690ced +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:21 2021 +0100 + + softmmu/physmem: Introduce MemTxAttrs::memory field and MEMTX_ACCESS_ERROR + + Add the 'memory' bit to the memory attributes to restrict bus + controller accesses to memories. + + Introduce flatview_access_allowed() to check bus permission + before running any bus transaction. + + Have read/write accessors return MEMTX_ACCESS_ERROR if an access is + restricted. + + There is no change for the default case where 'memory' is not set. + + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-4-philmd@redhat.com> + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + [thuth: Replaced MEMTX_BUS_ERROR with MEMTX_ACCESS_ERROR, remove "inline"] + Signed-off-by: Thomas Huth + +(cherry picked from commit 3ab6fdc91b72e156da22848f0003ff4225690ced) +Signed-off-by: Jon Maloy +--- + include/exec/memattrs.h | 9 +++++++++ + softmmu/physmem.c | 45 +++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h +index 95f2d20d55..9fb98bc1ef 100644 +--- a/include/exec/memattrs.h ++++ b/include/exec/memattrs.h +@@ -35,6 +35,14 @@ typedef struct MemTxAttrs { + unsigned int secure:1; + /* Memory access is usermode (unprivileged) */ + unsigned int user:1; ++ /* ++ * Bus interconnect and peripherals can access anything (memories, ++ * devices) by default. By setting the 'memory' bit, bus transaction ++ * are restricted to "normal" memories (per the AMBA documentation) ++ * versus devices. Access to devices will be logged and rejected ++ * (see MEMTX_ACCESS_ERROR). ++ */ ++ unsigned int memory:1; + /* Requester ID (for MSI for example) */ + unsigned int requester_id:16; + /* Invert endianness for this page */ +@@ -66,6 +74,7 @@ typedef struct MemTxAttrs { + #define MEMTX_OK 0 + #define MEMTX_ERROR (1U << 0) /* device returned an error */ + #define MEMTX_DECODE_ERROR (1U << 1) /* nothing at that address */ ++#define MEMTX_ACCESS_ERROR (1U << 2) /* access denied */ + typedef uint32_t MemTxResult; + + #endif +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 483a31be81..4d0ef5f92f 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -41,6 +41,8 @@ + #include "qemu/config-file.h" + #include "qemu/error-report.h" + #include "qemu/qemu-print.h" ++#include "qemu/log.h" ++#include "qemu/osdep.h" + #include "exec/memory.h" + #include "exec/ioport.h" + #include "sysemu/dma.h" +@@ -2759,6 +2761,33 @@ static bool prepare_mmio_access(MemoryRegion *mr) + return release_lock; + } + ++/** ++ * flatview_access_allowed ++ * @mr: #MemoryRegion to be accessed ++ * @attrs: memory transaction attributes ++ * @addr: address within that memory region ++ * @len: the number of bytes to access ++ * ++ * Check if a memory transaction is allowed. ++ * ++ * Returns: true if transaction is allowed, false if denied. ++ */ ++static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, ++ hwaddr addr, hwaddr len) ++{ ++ if (likely(!attrs.memory)) { ++ return true; ++ } ++ if (memory_region_is_ram(mr)) { ++ return true; ++ } ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Invalid access to non-RAM device at " ++ "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " ++ "region '%s'\n", addr, len, memory_region_name(mr)); ++ return false; ++} ++ + /* Called within RCU critical section. */ + static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, +@@ -2773,7 +2802,10 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + const uint8_t *buf = ptr; + + for (;;) { +- if (!memory_access_is_direct(mr, true)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, true)) { + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + /* XXX: could force current_cpu to NULL to avoid +@@ -2818,6 +2850,9 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_write_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +@@ -2836,7 +2871,10 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + + fuzz_dma_read_cb(addr, len, mr); + for (;;) { +- if (!memory_access_is_direct(mr, false)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); +@@ -2879,6 +2917,9 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +-- +2.27.0 + diff --git a/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch b/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch new file mode 100644 index 0000000..62f7037 --- /dev/null +++ b/SOURCES/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch @@ -0,0 +1,80 @@ +From 916423392b46167c6683b0240610bb5a745590da Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 2/3] softmmu/physmem: Simplify flatview_write and + address_space_access_valid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [2/3] daabe41eefd5c519def592e374fa368e32a680d3 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:20 2021 +0100 + + softmmu/physmem: Simplify flatview_write and address_space_access_valid + + Remove unuseful local 'result' variables. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Alexander Bulekov + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-3-philmd@redhat.com> + Signed-off-by: Thomas Huth + +(cherry picked from commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9) +Signed-off-by: Jon Maloy +--- + softmmu/physmem.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 3524c04c2a..483a31be81 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -2815,14 +2815,11 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; +- MemTxResult result = MEMTX_OK; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); +- result = flatview_write_continue(fv, addr, attrs, buf, len, +- addr1, l, mr); +- +- return result; ++ return flatview_write_continue(fv, addr, attrs, buf, len, ++ addr1, l, mr); + } + + /* Called within RCU critical section. */ +@@ -3119,12 +3116,10 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs) + { + FlatView *fv; +- bool result; + + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); +- result = flatview_access_valid(fv, addr, len, is_write, attrs); +- return result; ++ return flatview_access_valid(fv, addr, len, is_write, attrs); + } + + static hwaddr +-- +2.27.0 + diff --git a/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch b/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch new file mode 100644 index 0000000..94ff608 --- /dev/null +++ b/SOURCES/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch @@ -0,0 +1,65 @@ +From 3014c7c11b6e64433fe9f3c463bd91e318ac96b6 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 2/2] target/arm/kvm: Retry KVM_CREATE_VM call if it fails + EINTR + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [2/2] 8494bbfb3fcd8693f56312f984d2964d1ca275c2 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit bbde13cd14ad4eec18529ce0bf5876058464e124 +Author: Peter Maydell +Date: Fri Sep 30 12:38:24 2022 +0100 + + target/arm/kvm: Retry KVM_CREATE_VM call if it fails EINTR + + Occasionally the KVM_CREATE_VM ioctl can return EINTR, even though + there is no pending signal to be taken. In commit 94ccff13382055 + we added a retry-on-EINTR loop to the KVM_CREATE_VM call in the + generic KVM code. Adopt the same approach for the use of the + ioctl in the Arm-specific KVM code (where we use it to create a + scratch VM for probing for various things). + + For more information, see the mailing list thread: + https://lore.kernel.org/qemu-devel/8735e0s1zw.wl-maz@kernel.org/ + + Reported-by: Vitaly Chikunov + Signed-off-by: Peter Maydell + Reviewed-by: Vitaly Chikunov + Reviewed-by: Eric Auger + Acked-by: Marc Zyngier + Message-id: 20220930113824.1933293-1-peter.maydell@linaro.org + +(cherry picked from commit bbde13cd14ad4eec18529ce0bf5876058464e124) +Signed-off-by: Jon Maloy +--- + target/arm/kvm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..1ae4e51055 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -80,7 +80,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + if (max_vm_pa_size < 0) { + max_vm_pa_size = 0; + } +- vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ do { ++ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ } while (vmfd == -1 && errno == EINTR); + if (vmfd < 0) { + goto err; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch b/SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch new file mode 100644 index 0000000..04db85f --- /dev/null +++ b/SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch @@ -0,0 +1,73 @@ +From 688c9f386635544dbc468171a32fbc84f0c9224e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 18 Mar 2022 16:23:47 +0100 +Subject: [PATCH 12/24] target/i386: kvm: do not access uninitialized variable + on older kernels + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [12/13] 776fac1e7d1aa16ec5f4d99ddad3039eab8212af +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP. +Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however +did not fully consider the behavior on older kernels. First, it warns +too aggressively. Second, it invokes the KVM_GET_DEVICE_ATTR ioctl +unconditionally and then uses the "bitmask" variable, which remains +uninitialized if the ioctl fails. Third, kvm_ioctl returns -errno rather +than -1 on errors. + +While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID +is not enough. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3ec5ad40081b14af28496198b4d08dbe13386790) +Signed-off-by: Paul Lai +--- + target/i386/kvm/kvm.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index b1128b0e07..bd439e56ad 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { ++ /* ++ * The value returned by KVM_GET_SUPPORTED_CPUID does not include ++ * features that still have to be enabled with the arch_prctl ++ * system call. QEMU needs the full value, which is retrieved ++ * with KVM_GET_DEVICE_ATTR. ++ */ + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, +@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { +- warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ return ret; + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); +- if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { +- warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " +- "error: %d", rc); ++ if (rc < 0) { ++ if (rc != -ENXIO) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ return ret; + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { +-- +2.35.3 + diff --git a/SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch b/SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch new file mode 100644 index 0000000..47ce2af --- /dev/null +++ b/SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch @@ -0,0 +1,83 @@ +From 416de21d11540a927cceb533bf54ce28ffa15ad6 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 24 Mar 2022 09:21:41 +0100 +Subject: [PATCH 2/3] target/i386: properly reset TSC on reset + +RH-Author: Paolo Bonzini +RH-MergeRequest: 172: target/i386: properly reset TSC on reset +RH-Commit: [1/1] 7008bc5d02ad0a2d8b78259459d22d8f0986c989 +RH-Bugzilla: 2070417 +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Igor Mammedov +RH-Acked-by: Vitaly Kuznetsov + +Some versions of Windows hang on reboot if their TSC value is greater +than 2^54. The calibration of the Hyper-V reference time overflows +and fails; as a result the processors' clock sources are out of sync. + +The issue is that the TSC _should_ be reset to 0 on CPU reset and +QEMU tries to do that. However, KVM special cases writing 0 to the +TSC and thinks that QEMU is trying to hot-plug a CPU, which is +correct the first time through but not later. Thwart this valiant +effort and reset the TSC to 1 instead, but only if the CPU has been +run once. + +For this to work, env->tsc has to be moved to the part of CPUArchState +that is not zeroed at the beginning of x86_cpu_reset. + +Reported-by: Vadim Rozenfeld +Supersedes: <20220324082346.72180-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5286c3662294119dc2dd1e9296757337211451f6) +--- + target/i386/cpu.c | 13 +++++++++++++ + target/i386/cpu.h | 2 +- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6e25d13339..dd6935b1dd 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5871,6 +5871,19 @@ static void x86_cpu_reset(DeviceState *dev) + env->xstate_bv = 0; + + env->pat = 0x0007040600070406ULL; ++ ++ if (kvm_enabled()) { ++ /* ++ * KVM handles TSC = 0 specially and thinks we are hot-plugging ++ * a new CPU, use 1 instead to force a reset. ++ */ ++ if (env->tsc != 0) { ++ env->tsc = 1; ++ } ++ } else { ++ env->tsc = 0; ++ } ++ + env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; + if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { + env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 04f2b790c9..c6a6c871f1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1510,7 +1510,6 @@ typedef struct CPUX86State { + target_ulong kernelgsbase; + #endif + +- uint64_t tsc; + uint64_t tsc_adjust; + uint64_t tsc_deadline; + uint64_t tsc_aux; +@@ -1660,6 +1659,7 @@ typedef struct CPUX86State { + int64_t tsc_khz; + int64_t user_tsc_khz; /* for sanity check only */ + uint64_t apic_bus_freq; ++ uint64_t tsc; + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + void *xsave_buf; + uint32_t xsave_buf_len; +-- +2.35.1 + diff --git a/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch new file mode 100644 index 0000000..188c6bf --- /dev/null +++ b/SOURCES/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch @@ -0,0 +1,50 @@ +From e1870dec813fa6f8482f4f27b7a9bef8c1584b6b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Feb 2023 14:48:37 +0100 +Subject: [PATCH 3/3] target/s390x/arch_dump: Fix memory corruption in + s390x_write_elf64_notes() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 260: target/s390x/arch_dump: Fix memory corruption in s390x_write_elf64_notes() +RH-Bugzilla: 2168187 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] 67b71ed720a1f03d5bda9119969ea95fc4a6106d + +Bugzilla: https://bugzilla.redhat.com/2168187 +Upstream-Status: Posted (and reviewed, but not merged yet) + +"note_size" can be smaller than sizeof(note), so unconditionally calling +memset(notep, 0, sizeof(note)) could cause a memory corruption here in +case notep has been allocated dynamically, thus let's use note_size as +length argument for memset() instead. + +Fixes: 113d8f4e95 ("s390x: pv: Add dump support") +Message-Id: <20230214141056.680969-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index a2329141e8..a7c44ba49d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, + notep = g_malloc(note_size); + } + +- memset(notep, 0, sizeof(note)); ++ memset(notep, 0, note_size); + + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); +-- +2.37.3 + diff --git a/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch new file mode 100644 index 0000000..cfc1e4a --- /dev/null +++ b/SOURCES/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -0,0 +1,106 @@ +From 8d1a60069cddcc69ef1a6f50f2b55343de348b57 Mon Sep 17 00:00:00 2001 +From: Janis Schoetterl-Glausch +Date: Fri, 6 May 2022 17:39:56 +0200 +Subject: [PATCH 2/3] target/s390x: kvm: Honor storage keys during emulation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [2/2] 980dbb4eba8d2f1da7cf4113230d0a6483cffc4f + +Storage key controlled protection is currently not honored when +emulating instructions. +If available, enable key protection for the MEM_OP ioctl, thereby +enabling it for the s390_cpu_virt_mem_* functions, when using kvm. +As a result, the emulation of the following instructions honors storage +keys: + +* CLP + The Synch I/O CLP command would need special handling in order + to support storage keys, but is currently not supported. +* CHSC + Performing commands asynchronously would require special + handling, but commands are currently always synchronous. +* STSI +* TSCH + Must (and does) not change channel if terminated due to + protection. +* MSCH + Suppressed on protection, works because fetching instruction. +* SSCH + Suppressed on protection, works because fetching instruction. +* STSCH +* STCRW + Suppressed on protection, this works because no partial store is + possible, because the operand cannot span multiple pages. +* PCISTB +* MPCIFC +* STPCIFC + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 + +Signed-off-by: Janis Schoetterl-Glausch +Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) +Signed-off-by: Cédric Le Goater +--- + target/s390x/kvm/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index c52434985b..ba04997da1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + static int cap_sync_regs; + static int cap_async_pf; + static int cap_mem_op; ++static int cap_mem_op_extension; + static int cap_s390_irq; + static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + ++static bool mem_op_storage_key_support; ++ + static int active_cmma; + + static int kvm_s390_query_mem_limit(uint64_t *memory_limit) +@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); ++ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); ++ mem_op_storage_key_support = cap_mem_op_extension > 0; + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); +@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + : KVM_S390_MEMOP_LOGICAL_READ, + .buf = (uint64_t)hostbuf, + .ar = ar, ++ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, + }; + int ret; + +@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + if (!hostbuf) { + mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + } ++ if (mem_op_storage_key_support) { ++ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { +-- +2.35.3 + diff --git a/SOURCES/kvm-tests-acpi-SLIC-update-expected-blobs.patch b/SOURCES/kvm-tests-acpi-SLIC-update-expected-blobs.patch new file mode 100644 index 0000000..4d5fc35 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-SLIC-update-expected-blobs.patch @@ -0,0 +1,47 @@ +From 0f5984bd89d481bf2494d4b3c36ef80350f44811 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 12/18] tests: acpi: SLIC: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [4/10] ca28e5c57f9eb432e5ad6b1cb7ef646a86890dd5 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:20 2021 -0500 + + tests: acpi: SLIC: update expected blobs + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | Bin 0 -> 36 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 2 -- + 3 files changed, 2 deletions(-) + +literal 0 +HcmV?d00001 + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 49dbf8fa3e..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,3 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/q35/FACP.slic", +-"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-add-SLIC-table-test.patch b/SOURCES/kvm-tests-acpi-add-SLIC-table-test.patch new file mode 100644 index 0000000..9e54a7f --- /dev/null +++ b/SOURCES/kvm-tests-acpi-add-SLIC-table-test.patch @@ -0,0 +1,76 @@ +From 341715473c2a71f11a3888420a0caecf27ed4eb5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 11/18] tests: acpi: add SLIC table test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [3/10] baac9b82c16a50eb4640fd7146775c9d507c7b21 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 11edfabee443b149468a82b5efc88c96d1d259ec +Author: Igor Mammedov +Date: Mon Dec 27 14:31:19 2021 -0500 + + tests: acpi: add SLIC table test + + When user uses '-acpitable' to add SLIC table, some ACPI + tables (FADT) will change its 'Oem ID'/'Oem Table ID' fields to + match that of SLIC. Test makes sure thati QEMU handles + those fields correctly when SLIC table is added with + '-acpitable' option. + + Conflicts: tests/qtest/bios-tables-test.c + due to missing 39d7554b2009 ("tests/acpi: add test case for VIOT") + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 11edfabee443b149468a82b5efc88c96d1d259ec) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 16d8304cde..e159b71136 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1467,6 +1467,20 @@ static void test_acpi_virt_tcg(void) + free_test_data(&data); + } + ++static void test_acpi_q35_slic(void) ++{ ++ test_data data = { ++ .machine = MACHINE_Q35, ++ .variant = ".slic", ++ }; ++ ++ test_acpi_one("-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id='ME'," ++ "oem_rev=00002210,asl_compiler_id='qemu'," ++ "asl_compiler_rev=00000000,data=/dev/null", ++ &data); ++ free_test_data(&data); ++} ++ + static void test_oem_fields(test_data *data) + { + int i; +@@ -1641,6 +1655,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); + qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); + } ++ qtest_add_func("acpi/q35/slic", test_acpi_q35_slic); + } else if (strcmp(arch, "aarch64") == 0) { + if (has_tcg) { + qtest_add_func("acpi/virt", test_acpi_virt_tcg); +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch b/SOURCES/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch new file mode 100644 index 0000000..05a6838 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch @@ -0,0 +1,84 @@ +From d94b3278c84cf7451489631d804a6b5cbd28a59d Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 13/18] tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for + test_oem_fields() test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [5/10] 4ec8c738acec178c2f005f189b0c2a77a7af4088 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit a849522f726767022203ef2b6c395ea19facb866 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:29 2022 -0500 + + tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test + + The next commit will revert OEM fields padding with whitespace to + padding with '\0' as it was before [1]. As result test_oem_fields() will + fail due to unexpectedly smaller ID sizes read from QEMU ACPI tables. + + Pad OEM_ID/OEM_TABLE_ID manually with spaces so that values the test + puts on QEMU CLI and expected values match. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit a849522f726767022203ef2b6c395ea19facb866) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index e159b71136..348fdbd202 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,9 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST" +-#define OEM_TABLE_ID "OEM" +-#define OEM_TEST_ARGS "-machine x-oem-id="OEM_ID",x-oem-table-id="OEM_TABLE_ID ++#define OEM_ID "TEST " ++#define OEM_TABLE_ID "OEM " ++#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ ++ OEM_TABLE_ID "'" + + typedef struct { + bool tcg_only; +@@ -1484,11 +1485,7 @@ static void test_acpi_q35_slic(void) + static void test_oem_fields(test_data *data) + { + int i; +- char oem_id[6]; +- char oem_table_id[8]; + +- strpadcpy(oem_id, sizeof oem_id, OEM_ID, ' '); +- strpadcpy(oem_table_id, sizeof oem_table_id, OEM_TABLE_ID, ' '); + for (i = 0; i < data->tables->len; ++i) { + AcpiSdtTable *sdt; + +@@ -1498,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, oem_id, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, oem_table_id, 8) == 0); ++ g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch b/SOURCES/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch new file mode 100644 index 0000000..66d62e5 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch @@ -0,0 +1,77 @@ +From 485bf2eb8edabd4553d995d5e32224df1e510aa2 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 17/18] tests: acpi: test short OEM_ID/OEM_TABLE_ID values in + test_oem_fields() + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [9/10] 31339223fb6c6cc32185b9fdaac76f2709b17ad6 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 408ca92634770de5eac7965ed97c6260e770f2e7 +Author: Igor Mammedov +Date: Fri Jan 14 09:26:41 2022 -0500 + + tests: acpi: test short OEM_ID/OEM_TABLE_ID values in test_oem_fields() + + Previous patch [1] added explicit whitespace padding to OEM_ID/OEM_TABLE_ID + values used in test_oem_fields() testcase to avoid false positive and + bisection issues when QEMU is switched to \0' padding. As result + testcase ceased to test values that were shorter than max possible + length values. + + Update testcase to make sure that it's testing shorter IDs like it + used to before [2]. + + 1) "tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test" + 2) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + + Signed-off-by: Igor Mammedov + Message-Id: <20220114142641.1727679-1-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 408ca92634770de5eac7965ed97c6260e770f2e7) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 348fdbd202..515a647490 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,10 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST " +-#define OEM_TABLE_ID "OEM " +-#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ +- OEM_TABLE_ID "'" ++#define OEM_ID "TEST" ++#define OEM_TABLE_ID "OEM" ++#define OEM_TEST_ARGS "-machine x-oem-id=" OEM_ID ",x-oem-table-id=" \ ++ OEM_TABLE_ID + + typedef struct { + bool tcg_only; +@@ -1495,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); ++ g_assert(strncmp((char *)sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(strncmp((char *)sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-update-expected-blobs.patch b/SOURCES/kvm-tests-acpi-update-expected-blobs.patch new file mode 100644 index 0000000..8f300c4 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-update-expected-blobs.patch @@ -0,0 +1,58 @@ +From 4785d2a77fbea681975e5c48ae6a1be49058e089 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 16/18] tests: acpi: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [8/10] e069c5de88f34393d65d32b60380865832820302 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 5adc3aba875416b0e077d8a29ddd0357883746f4 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:32 2022 -0500 + + tests: acpi: update expected blobs + + Expected changes caused by previous commit: + + nvdimm ssdt (q35/pc/virt): + - * OEM Table ID "NVDIMM " + + * OEM Table ID "NVDIMM" + + SLIC test FADT (tests/data/acpi/q35/FACP.slic): + -[010h 0016 8] Oem Table ID : "ME " + +[010h 0016 8] Oem Table ID : "ME" + + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 5adc3aba875416b0e077d8a29ddd0357883746f4) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/pc/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/virt/SSDT.memhp | Bin 736 -> 736 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 4 ---- + 5 files changed, 4 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 7faa8f53be..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,5 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/SSDT.memhp", +-"tests/data/acpi/pc/SSDT.dimmpxm", +-"tests/data/acpi/q35/SSDT.dimmpxm", +-"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch b/SOURCES/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch new file mode 100644 index 0000000..4a1b350 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch @@ -0,0 +1,47 @@ +From 4e6482073df85db5982aa03ab0355e632b7157fc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 10/18] tests: acpi: whitelist expected blobs before changing + them + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [2/10] c664ecad30ca9c13025a63bb31ae7b80fd63e4df (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit e71f6ab9d93a7d01e833647e7010c1079c4cef30 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:18 2021 -0500 + + tests: acpi: whitelist expected blobs before changing them + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit e71f6ab9d93a7d01e833647e7010c1079c4cef30) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 0 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | 0 + tests/qtest/bios-tables-test-allowed-diff.h | 2 ++ + 3 files changed, 2 insertions(+) + create mode 100644 tests/data/acpi/q35/FACP.slic + create mode 100644 tests/data/acpi/q35/SLIC.slic + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..49dbf8fa3e 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,3 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/q35/FACP.slic", ++"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch b/SOURCES/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch new file mode 100644 index 0000000..30289c7 --- /dev/null +++ b/SOURCES/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch @@ -0,0 +1,57 @@ +From a132a22e316121cf00ff733afb1ad1dc313e14b3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 14/18] tests: acpi: whitelist nvdimm's SSDT and FACP.slic + expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [6/10] 3f3a929cde82f228da1e4bc66e4c869467c0289c (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c +Author: Igor Mammedov +Date: Wed Jan 12 08:03:30 2022 -0500 + + tests: acpi: whitelist nvdimm's SSDT and FACP.slic expected blobs + + The next commit will revert OEM fields whitespace padding to + padding with '\0' as it was before [1]. That will change OEM + Table ID for: + * SSDT.*: where it was padded from 6 characters to 8 + * FACP.slic: where it was padded from 2 characters to 8 + after reverting whitespace padding, it will be replaced with + '\0' which effectively will shorten OEM table ID to 6 and 2 + characters. + + Whitelist affected tables before introducing the change. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test-allowed-diff.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..7faa8f53be 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,5 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/SSDT.memhp", ++"tests/data/acpi/pc/SSDT.dimmpxm", ++"tests/data/acpi/q35/SSDT.dimmpxm", ++"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch new file mode 100644 index 0000000..4c04458 --- /dev/null +++ b/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch @@ -0,0 +1,120 @@ +From 24af433728429578e586d179e27451b7d4a46cba Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:33 +0100 +Subject: [PATCH 3/3] tests/qtest/fdc-test: Add a regression test for + CVE-2021-3507 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [2/2] 31ec71276b521b06d4142fffa88a3fa4d1494d92 (jmaloy/qemu-kvm) +RH-Bugzilla: 1951521 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth +RH-Acked-by: Hanna Reitz + +Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 + +Without the previous commit, when running 'make check-qtest-i386' +with QEMU configured with '--enable-sanitizers' we get: + + ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 + READ of size 786432 at 0x619000062a00 thread T0 + #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) + #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 + #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 + #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 + #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 + #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 + #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 + #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 + #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 + #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 + #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 + #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 + #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 + + 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) + allocated by thread T0 here: + #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) + #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 + #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 + #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 + #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 + #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 + + SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy + Shadow bytes around the buggy address: + 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd + Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Heap left redzone: fa + Freed heap region: fd + ==4028352==ABORTING + +[ kwolf: Added snapshot=on to prevent write file lock failure ] + +Reported-by: Alexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Alexander Bulekov +Signed-off-by: Kevin Wolf +(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) +Signed-off-by: Jon Maloy +--- + tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c +index 8f6eee84a4..6f5850354f 100644 +--- a/tests/qtest/fdc-test.c ++++ b/tests/qtest/fdc-test.c +@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) + qtest_quit(s); + } + ++static void test_cve_2021_3507(void) ++{ ++ QTestState *s; ++ ++ s = qtest_initf("-nographic -m 32M -nodefaults " ++ "-drive file=%s,format=raw,if=floppy,snapshot=on", ++ test_image); ++ qtest_outl(s, 0x9, 0x0a0206); ++ qtest_outw(s, 0x3f4, 0x1600); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + int fd; +@@ -614,6 +634,7 @@ int main(int argc, char **argv) + qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); + qtest_add_func("/fdc/fuzz-registers", fuzz_registers); + qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); ++ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); + + ret = g_test_run(); + +-- +2.35.3 + diff --git a/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch b/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch new file mode 100644 index 0000000..813d0ca --- /dev/null +++ b/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch @@ -0,0 +1,105 @@ +From 87a318f0b8758f940a316831a77b6ebebca42b19 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 4 May 2022 10:35:17 -0400 +Subject: [PATCH 3/3] ui/cursor: fix integer overflow in cursor_alloc + (CVE-2021-4206) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 180: ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) +RH-Commit: [1/1] 7ad711347bc6248dc5aefa45401ca74448dee5e5 (jmaloy/qemu-kvm) +RH-Bugzilla: 2040734 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040734 +Upstream: Merged +CVE: CVE-2021-4206 + +commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:17:12 2022 +0200 + + ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) + + Prevent potential integer overflow by limiting 'width' and 'height' to + 512x512. Also change 'datasize' type to size_t. Refer to security + advisory https://starlabs.sg/advisories/22-4206/ for more information. + + Fixes: CVE-2021-4206 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081712.345609-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 7 +++++++ + hw/display/vmware_vga.c | 2 ++ + ui/cursor.c | 8 +++++++- + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index 237ed293ba..ca217004bf 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -247,6 +247,13 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + size_t size; + + c = cursor_alloc(cursor->header.width, cursor->header.height); ++ ++ if (!c) { ++ qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__, ++ cursor->header.width, cursor->header.height); ++ goto fail; ++ } ++ + c->hot_x = cursor->header.hot_spot_x; + c->hot_y = cursor->header.hot_spot_y; + switch (cursor->header.type) { +diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c +index e2969a6c81..2b81d6122f 100644 +--- a/hw/display/vmware_vga.c ++++ b/hw/display/vmware_vga.c +@@ -509,6 +509,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s, + int i, pixels; + + qc = cursor_alloc(c->width, c->height); ++ assert(qc != NULL); ++ + qc->hot_x = c->hot_x; + qc->hot_y = c->hot_y; + switch (c->bpp) { +diff --git a/ui/cursor.c b/ui/cursor.c +index 1d62ddd4d0..835f0802f9 100644 +--- a/ui/cursor.c ++++ b/ui/cursor.c +@@ -46,6 +46,8 @@ static QEMUCursor *cursor_parse_xpm(const char *xpm[]) + + /* parse pixel data */ + c = cursor_alloc(width, height); ++ assert(c != NULL); ++ + for (pixel = 0, y = 0; y < height; y++, line++) { + for (x = 0; x < height; x++, pixel++) { + idx = xpm[line][x]; +@@ -91,7 +93,11 @@ QEMUCursor *cursor_builtin_left_ptr(void) + QEMUCursor *cursor_alloc(int width, int height) + { + QEMUCursor *c; +- int datasize = width * height * sizeof(uint32_t); ++ size_t datasize = width * height * sizeof(uint32_t); ++ ++ if (width > 512 || height > 512) { ++ return NULL; ++ } + + c = g_malloc0(sizeof(QEMUCursor) + datasize); + c->width = width; +-- +2.35.1 + diff --git a/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch b/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch new file mode 100644 index 0000000..9ade171 --- /dev/null +++ b/SOURCES/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch @@ -0,0 +1,80 @@ +From d3602e5afa1e90c5e33625fc528db7f96195bada Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 7 Nov 2022 19:59:46 -0500 +Subject: [PATCH 42/42] ui/vnc-clipboard: fix integer underflow in + vnc_client_cut_text_ext +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 227: ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext +RH-Bugzilla: 2129760 +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/1] ac19a6c0777e308061bcb6d1de5cc9beaa105a3a (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2129760 +CVE: CVE-2022-3165 +Upstream: Merged + +commit d307040b18bfcb1393b910f1bae753d5c12a4dc7 +Author: Mauro Matteo Cascella +Date: Sun Sep 25 22:45:11 2022 +0200 + + ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext + + Extended ClientCutText messages start with a 4-byte header. If len < 4, + an integer underflow occurs in vnc_client_cut_text_ext. The result is + used to decompress data in a while loop in inflate_buffer, leading to + CPU consumption and denial of service. Prevent this by checking dlen in + protocol_client_msg. + + Fixes: CVE-2022-3165 + Fixes: 0bf41cab93e5 ("ui/vnc: clipboard support") + Reported-by: TangPeng + Signed-off-by: Mauro Matteo Cascella + Message-Id: <20220925204511.1103214-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit d307040b18bfcb1393b910f1bae753d5c12a4dc7) +Signed-off-by: Jon Maloy +--- + ui/vnc.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index af02522e84..a14b6861be 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -2442,8 +2442,8 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + if (len == 1) { + return 8; + } ++ uint32_t dlen = abs(read_s32(data, 4)); + if (len == 8) { +- uint32_t dlen = abs(read_s32(data, 4)); + if (dlen > (1 << 20)) { + error_report("vnc: client_cut_text msg payload has %u bytes" + " which exceeds our limit of 1MB.", dlen); +@@ -2456,8 +2456,13 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + } + + if (read_s32(data, 4) < 0) { +- vnc_client_cut_text_ext(vs, abs(read_s32(data, 4)), +- read_u32(data, 8), data + 12); ++ if (dlen < 4) { ++ error_report("vnc: malformed payload (header less than 4 bytes)" ++ " in extended clipboard pseudo-encoding."); ++ vnc_client_error(vs); ++ break; ++ } ++ vnc_client_cut_text_ext(vs, dlen, read_u32(data, 8), data + 12); + break; + } + vnc_client_cut_text(vs, read_u32(data, 4), data + 8); +-- +2.37.3 + diff --git a/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch b/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch new file mode 100644 index 0000000..ae80c9d --- /dev/null +++ b/SOURCES/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch @@ -0,0 +1,90 @@ +From ffdf44cb8b4c743e7ab0ce46d62687d7178f9a49 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Fri, 18 Nov 2022 20:26:54 -0500 +Subject: [PATCH 3/3] ui/vnc.c: Fixed a deadlock bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 234: ui/vnc.c: Fixed a deadlock bug. +RH-Bugzilla: 2141896 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/1] d3d1d28d7b621a8ae8a593a5bd5303fa7951c17c (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141896 +Upstream: Merged + +commit 1dbbe6f172810026c51dc84ed927a3cc23017949 +Author: Rao Lei +Date: Wed Jan 5 10:08:08 2022 +0800 + + ui/vnc.c: Fixed a deadlock bug. + + The GDB statck is as follows: + (gdb) bt + 0 __lll_lock_wait (futex=futex@entry=0x56211df20360, private=0) at lowlevellock.c:52 + 1 0x00007f263caf20a3 in __GI___pthread_mutex_lock (mutex=0x56211df20360) at ../nptl/pthread_mutex_lock.c:80 + 2 0x000056211a757364 in qemu_mutex_lock_impl (mutex=0x56211df20360, file=0x56211a804857 "../ui/vnc-jobs.h", line=60) + at ../util/qemu-thread-posix.c:80 + 3 0x000056211a0ef8c7 in vnc_lock_output (vs=0x56211df14200) at ../ui/vnc-jobs.h:60 + 4 0x000056211a0efcb7 in vnc_clipboard_send (vs=0x56211df14200, count=1, dwords=0x7ffdf1701338) at ../ui/vnc-clipboard.c:138 + 5 0x000056211a0f0129 in vnc_clipboard_notify (notifier=0x56211df244c8, data=0x56211dd1bbf0) at ../ui/vnc-clipboard.c:209 + 6 0x000056211a75dde8 in notifier_list_notify (list=0x56211afa17d0 , data=0x56211dd1bbf0) at ../util/notify.c:39 + 7 0x000056211a0bf0e6 in qemu_clipboard_update (info=0x56211dd1bbf0) at ../ui/clipboard.c:50 + 8 0x000056211a0bf05d in qemu_clipboard_peer_release (peer=0x56211df244c0, selection=QEMU_CLIPBOARD_SELECTION_CLIPBOARD) + at ../ui/clipboard.c:41 + 9 0x000056211a0bef9b in qemu_clipboard_peer_unregister (peer=0x56211df244c0) at ../ui/clipboard.c:19 + 10 0x000056211a0d45f3 in vnc_disconnect_finish (vs=0x56211df14200) at ../ui/vnc.c:1358 + 11 0x000056211a0d4c9d in vnc_client_read (vs=0x56211df14200) at ../ui/vnc.c:1611 + 12 0x000056211a0d4df8 in vnc_client_io (ioc=0x56211ce70690, condition=G_IO_IN, opaque=0x56211df14200) at ../ui/vnc.c:1649 + 13 0x000056211a5b976c in qio_channel_fd_source_dispatch + (source=0x56211ce50a00, callback=0x56211a0d4d71 , user_data=0x56211df14200) at ../io/channel-watch.c:84 + 14 0x00007f263ccede8e in g_main_context_dispatch () at /lib/x86_64-linux-gnu/libglib-2.0.so.0 + 15 0x000056211a77d4a1 in glib_pollfds_poll () at ../util/main-loop.c:232 + 16 0x000056211a77d51f in os_host_main_loop_wait (timeout=958545) at ../util/main-loop.c:255 + 17 0x000056211a77d630 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:531 + 18 0x000056211a45bc8e in qemu_main_loop () at ../softmmu/runstate.c:726 + 19 0x000056211a0b45fa in main (argc=69, argv=0x7ffdf1701778, envp=0x7ffdf17019a8) at ../softmmu/main.c:50 + + From the call trace, we can see it is a deadlock bug. + vnc_disconnect_finish will acquire the output_mutex. + But, the output_mutex will be acquired again in vnc_clipboard_send. + Repeated locking will cause deadlock. So, I move + qemu_clipboard_peer_unregister() behind vnc_unlock_output(); + Fixes: 0bf41cab93e ("ui/vnc: clipboard support") + Signed-off-by: Lei Rao + Reviewed-by: Marc-André Lureau + Message-Id: <20220105020808.597325-1-lei.rao@intel.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 1dbbe6f172810026c51dc84ed927a3cc23017949) +Signed-off-by: Jon Maloy +--- + ui/vnc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index a14b6861be..76372ca1de 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs) + /* last client gone */ + vnc_update_server_surface(vs->vd); + } ++ vnc_unlock_output(vs); ++ + if (vs->cbpeer.update.notify) { + qemu_clipboard_peer_unregister(&vs->cbpeer); + } + +- vnc_unlock_output(vs); +- + qemu_mutex_destroy(&vs->output_mutex); + if (vs->bh != NULL) { + qemu_bh_delete(vs->bh); +-- +2.37.3 + diff --git a/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch new file mode 100644 index 0000000..c3dbcf9 --- /dev/null +++ b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -0,0 +1,56 @@ +From 9a62319b973ec33f9ccbeeae7f2f3b4b31db0c26 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:15 -0700 +Subject: [PATCH 17/24] vhost-net: fix improper cleanup in vhost_net_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] bebe7990a12e901fbb84e5e4b7a62744d75c9d9e +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +vhost_net_start() missed a corresponding stop_one() upon error from +vhost_set_vring_enable(). While at it, make the error handling for +err_start more robust. No real issue was found due to this though. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) +Signed-off-by: Jason Wang +--- + hw/net/vhost_net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 30379d2ca4..d6d7c51f62 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + r = vhost_set_vring_enable(peer, peer->vring_enable); + + if (r < 0) { ++ vhost_net_stop_one(get_vhost_net(peer), dev); + goto err_start; + } + } +@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + + err_start: + while (--i >= 0) { +- peer = qemu_get_peer(ncs , i); ++ peer = qemu_get_peer(ncs, i < data_queue_pairs ? ++ i : n->max_queue_pairs); + vhost_net_stop_one(get_vhost_net(peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch new file mode 100644 index 0000000..ef700fd --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -0,0 +1,58 @@ +From 01270bb66a4f7897a4fd06ba248eeeb41dc47571 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:16 -0700 +Subject: [PATCH 18/24] vhost-vdpa: backend feature should set only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 0ab13542cf25c129dc403db95c7db12cdb012744 +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +The vhost_vdpa_one_time_request() branch in +vhost_vdpa_set_backend_cap() incorrectly sends down +ioctls on vhost_dev with non-zero index. This may +end up with multiple VHOST_SET_BACKEND_FEATURES +ioctl calls sent down on the vhost-vdpa fd that is +shared between all these vhost_dev's. + +To fix it, send down ioctl only once via the first +vhost_dev with index 0. Toggle the polarity of the +vhost_vdpa_one_time_request() test should do the +trick. + +Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") +Signed-off-by: Si-Wei Liu +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 78da48a333..a9be24776a 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -525,7 +525,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch new file mode 100644 index 0000000..bbc1c85 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -0,0 +1,126 @@ +From c8cb46fa93a3ccad6f3e183045b270f28eed7b12 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:17 -0700 +Subject: [PATCH 19/24] vhost-vdpa: change name and polarity for + vhost_vdpa_one_time_request() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 727ab0bb813f073e8cd2f7e68a9acda60c2cb33d +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +The name vhost_vdpa_one_time_request() was confusing. No +matter whatever it returns, its typical occurrence had +always been at requests that only need to be applied once. +And the name didn't suggest what it actually checks for. +Change it to vhost_vdpa_first_dev() with polarity flipped +for better readibility of code. That way it is able to +reflect what the check is really about. + +This call is applicable to request which performs operation +only once, before queues are set up, and usually at the beginning +of the caller function. Document the requirement for it in place. + +Conflicts: hw/virtio/vhost-vdpa.c since we don't have shadow virtqueue +suport. + +Signed-off-by: Si-Wei Liu +Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index a9be24776a..38bbcb3c18 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -319,11 +319,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) + v->iova_range.last); + } + +-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) ++/* ++ * The use of this function is for requests that only need to be ++ * applied once. Typically such request occurs at the beginning ++ * of operation, and before setting up queues. It should not be ++ * used for request that performs operation until all queues are ++ * set, which would need to check dev->vq_index_end instead. ++ */ ++static bool vhost_vdpa_first_dev(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- return v->index != 0; ++ return v->index == 0; + } + + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) +@@ -351,7 +358,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + + vhost_vdpa_get_iova_range(v); + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -468,7 +475,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) + static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -496,7 +503,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + { + int ret; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -525,7 +532,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (!vhost_vdpa_one_time_request(dev)) { ++ if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +@@ -670,7 +677,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -739,7 +746,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, + + static int vhost_vdpa_set_owner(struct vhost_dev *dev) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch new file mode 100644 index 0000000..68c7d5f --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -0,0 +1,48 @@ +From c10ef6f79d4a4c8ccc5901b25234501c621e4e04 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:14 -0700 +Subject: [PATCH 16/24] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] b3b658dcb4695defe1fdb199570fb984291e8e21 +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +... such that no memory leaks on dangling net clients in case of +error. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) +Signed-off-by: Jason Wang +--- + net/vhost-vdpa.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 25dd6dd975..814f704687 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + err: + if (i) { +- qemu_del_net_client(ncs[0]); ++ for (i--; i >= 0; i--) { ++ qemu_del_net_client(ncs[i]); ++ } + } + qemu_close(vdpa_device_fd); + g_free(ncs); +-- +2.35.3 + diff --git a/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch b/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch new file mode 100644 index 0000000..50013c9 --- /dev/null +++ b/SOURCES/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch @@ -0,0 +1,76 @@ +From ff4e95d8652dadfed09913c7968514a2a7f36591 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 10:38:26 -0400 +Subject: [PATCH 2/2] vhost-vsock: detach the virqueue element in case of error + +RH-Author: Jon Maloy +RH-MergeRequest: 153: vhost-vsock: detach the virqueue element in case of error +RH-Commit: [1/1] 024dbc9073fddbe89a8ae8eb201f5bc674bffb64 (jmaloy/qemu-kvm) +RH-Bugzilla: 2063262 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063262 +Upstream: Merged +CVE: CVE-2022-26354 + +commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf +Author: Stefano Garzarella +Date: Mon Feb 28 10:50:58 2022 +0100 + + vhost-vsock: detach the virqueue element in case of error + + In vhost_vsock_common_send_transport_reset(), if an element popped from + the virtqueue is invalid, we should call virtqueue_detach_element() to + detach it from the virtqueue before freeing its memory. + + Fixes: fc0b9b0e1c ("vhost-vsock: add virtio sockets device") + Fixes: CVE-2022-26354 + Cc: qemu-stable@nongnu.org + Reported-by: VictorV + Signed-off-by: Stefano Garzarella + Message-Id: <20220228095058.27899-1-sgarzare@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vsock-common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index 3f3771274e..ed706681ac 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -153,19 +153,23 @@ static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); +- goto out; ++ goto err; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); +- goto out; ++ goto err; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +-out: ++ g_free(elem); ++ return; ++ ++err: ++ virtqueue_detach_element(vq, elem, 0); + g_free(elem); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch b/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch new file mode 100644 index 0000000..9af491f --- /dev/null +++ b/SOURCES/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch @@ -0,0 +1,102 @@ +From 56e2aef97e750ffdc572dcecbfc31314728d37a9 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Mar 2022 12:29:39 +0100 +Subject: [PATCH 2/2] virtio: fix feature negotiation for ACCESS_PLATFORM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] 264d3bdbbde985f16ed6f5a1786547c25fb8cc04 + +Unlike most virtio features ACCESS_PLATFORM is considered mandatory by +QEMU, i.e. the driver must accept it if offered by the device. The +virtio specification says that the driver SHOULD accept the +ACCESS_PLATFORM feature if offered, and that the device MAY fail to +operate if ACCESS_PLATFORM was offered but not negotiated. + +While a SHOULD ain't exactly a MUST, we are certainly allowed to fail +the device when the driver fences ACCESS_PLATFORM. With commit +2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") we already made the +decision to do so whenever the get_dma_as() callback is implemented (by +the bus), which in practice means for the entirety of virtio-pci. + +That means, if the device needs to translate I/O addresses, then +ACCESS_PLATFORM is mandatory. The aforementioned commit tells us in the +commit message that this is for security reasons. More precisely if we +were to allow a less then trusted driver (e.g. an user-space driver, or +a nested guest) to make the device bypass the IOMMU by not negotiating +ACCESS_PLATFORM, then the guest kernel would have no ability to +control/police (by programming the IOMMU) what pieces of guest memory +the driver may manipulate using the device. Which would break security +assumptions within the guest. + +If ACCESS_PLATFORM is offered not because we want the device to utilize +an IOMMU and do address translation, but because the device does not +have access to the entire guest RAM, and needs the driver to grant +access to the bits it needs access to (e.g. confidential guest support), +we still require the guest to have the corresponding logic and to accept +ACCESS_PLATFORM. If the driver does not accept ACCESS_PLATFORM, then +things are bound to go wrong, and we may see failures much less graceful +than failing the device because the driver didn't negotiate +ACCESS_PLATFORM. + +So let us make ACCESS_PLATFORM mandatory for the driver regardless +of whether the get_dma_as() callback is implemented or not. + +Signed-off-by: Halil Pasic +Fixes: 2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") + +Message-Id: <20220307112939.2780117-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Cornelia Huck +(cherry picked from commit 06134e2bc35dc21543d4cbcf31f858c03d383442) +--- + hw/virtio/virtio-bus.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 0f69d1c742..d7ec023adf 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- if (klass->get_dma_as != NULL && has_iommu) { ++ vdev->dma_as = &address_space_memory; ++ if (has_iommu) { ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ /* ++ * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and ++ * device operational. If the driver does not accept IOMMU_PLATFORM ++ * we fail the device. ++ */ + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); +- vdev->dma_as = klass->get_dma_as(qbus->parent); +- if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { +- error_setg(errp, ++ if (klass->get_dma_as) { ++ vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, + "iommu_platform=true is not supported by the device"); +- return; ++ return; ++ } + } +- } else { +- vdev->dma_as = &address_space_memory; + } + } + +-- +2.37.3 + diff --git a/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch b/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch new file mode 100644 index 0000000..b5632e1 --- /dev/null +++ b/SOURCES/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch @@ -0,0 +1,115 @@ +From c731ffdf9faee74e9522dff06e61cda817902088 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Feb 2022 12:28:57 +0100 +Subject: [PATCH 1/2] virtio: fix the condition for iommu_platform not + supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] d7edc7e3905a04644c9ff44b0d36122c72068e08 + +The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") claims to fail the device hotplug when iommu_platform +is requested, but not supported by the (vhost) device. On the first +glance the condition for detecting that situation looks perfect, but +because a certain peculiarity of virtio_platform it ain't. + +In fact the aforementioned commit introduces a regression. It breaks +virtio-fs support for Secure Execution, and most likely also for AMD SEV +or any other confidential guest scenario that relies encrypted guest +memory. The same also applies to any other vhost device that does not +support _F_ACCESS_PLATFORM. + +The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates +"device can not access all of the guest RAM" and "iova != gpa, thus +device needs to translate iova". + +Confidential guest technologies currently rely on the device/hypervisor +offering _F_ACCESS_PLATFORM, so that, after the feature has been +negotiated, the guest grants access to the portions of memory the +device needs to see. So in for confidential guests, generally, +_F_ACCESS_PLATFORM is about the restricted access to memory, but not +about the addresses used being something else than guest physical +addresses. + +This is the very reason for which commit f7ef7e6e3b ("vhost: correctly +turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the +vhost device that does not need it, because on the vhost interface it +only means "I/O address translation is needed". + +This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on +VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the +situation when _F_ACCESS_PLATFORM is requested, but no I/O translation +by the device, and thus no device capability is needed. In this +situation claiming that the device does not support iommu_plattform=on +is counter-productive. So let us stop doing that! + +Signed-off-by: Halil Pasic +Reported-by: Jakob Naucke +Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") +Acked-by: Cornelia Huck +Reviewed-by: Daniel Henrique Barboza +Tested-by: Daniel Henrique Barboza +Cc: Kevin Wolf +Cc: qemu-stable@nongnu.org + +Message-Id: <20220207112857.607829-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit e65902a913bf31ba79a83a3bd3621108b85cf645) +--- + hw/virtio/virtio-bus.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index d23db98c56..0f69d1c742 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ bool vdev_has_iommu; + Error *local_err = NULL; + + DPRINTF("%s: plug device.\n", qbus->name); +@@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { +- error_setg(errp, "iommu_platform=true is not supported by the device"); +- return; +- } +- + if (klass->device_plugged != NULL) { + klass->device_plugged(qbus->parent, &local_err); + } +@@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + if (klass->get_dma_as != NULL && has_iommu) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); + vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, ++ "iommu_platform=true is not supported by the device"); ++ return; ++ } + } else { + vdev->dma_as = &address_space_memory; + } +-- +2.37.3 + diff --git a/SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch b/SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch new file mode 100644 index 0000000..dc723bd --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch @@ -0,0 +1,48 @@ +From e118a451dc1ed68f1371a5d8e042120542be6d31 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 11 Nov 2021 12:06:00 +0100 +Subject: [PATCH 01/24] virtio-gpu: do not byteswap padding +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [1/13] 12714f53820b7632e7fc0a8a3bf8eb4a64f41750 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +In Linux 5.16, the padding of struct virtio_gpu_ctrl_hdr has become a +single-byte field followed by a uint8_t[3] array of padding bytes, +and virtio_gpu_ctrl_hdr_bswap does not compile anymore. + +Signed-off-by: Paolo Bonzini +Acked-by: Cornelia Huck +Reviewed-by: Alex Bennée +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20211111110604.207376-2-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a4663f1a5506626175fc64c86e52135587c36872) +Signed-off-by: Paul Lai +--- + include/hw/virtio/virtio-gpu-bswap.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h +index e2bee8f595..5faac0d8d5 100644 +--- a/include/hw/virtio/virtio-gpu-bswap.h ++++ b/include/hw/virtio/virtio-gpu-bswap.h +@@ -24,7 +24,6 @@ virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) + le32_to_cpus(&hdr->flags); + le64_to_cpus(&hdr->fence_id); + le32_to_cpus(&hdr->ctx_id); +- le32_to_cpus(&hdr->padding); + } + + static inline void +-- +2.35.3 + diff --git a/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch new file mode 100644 index 0000000..f23f38c --- /dev/null +++ b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -0,0 +1,143 @@ +From 39cdd781c885b0695f8830a33420caa9e9b0bd50 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:13 -0700 +Subject: [PATCH 15/24] virtio-net: align ctrl_vq index for non-mq guest for + vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] 2647cf59f3dd1e3d8af2d12c01e06ae26fbc1dc2 +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +With MQ enabled vdpa device and non-MQ supporting guest e.g. +booting vdpa with mq=on over OVMF of single vqp, below assert +failure is seen: + +../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. + +0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 +1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 +2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 +3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 +4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 +5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 +6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 +7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) + at ../hw/virtio/virtio-pci.c:974 +8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 +9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:361 +10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 +11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 +12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 +13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 +14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) + at ../softmmu/memory.c:492 +15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 +16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) + at ../softmmu/memory.c:1504 +17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 +18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 +19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) + at ../softmmu/physmem.c:2914 +20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., + attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 +21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 +22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 +23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 +24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 +25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 + +The cause for the assert failure is due to that the vhost_dev index +for the ctrl vq was not aligned with actual one in use by the guest. +Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), +if guest doesn't support multiqueue, the guest vq layout would shrink +to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). +This results in ctrl_vq taking a different vhost_dev group index than +the default. We can map vq to the correct vhost_dev group by checking +if MQ is supported by guest and successfully negotiated. Since the +MQ feature is only present along with CTRL_VQ, we ensure the index +2 is only meant for the control vq while MQ is not supported by guest. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Suggested-by: Jason Wang +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ec045c3f41..f118379bb4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/atomic.h" + #include "qemu/iov.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" + #include "hw/virtio/virtio.h" +@@ -3163,8 +3164,22 @@ static NetClientInfo net_virtio_info = { + static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return false; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3172,8 +3187,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); + } +-- +2.35.3 + diff --git a/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch new file mode 100644 index 0000000..25c1aa9 --- /dev/null +++ b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -0,0 +1,109 @@ +From c9b51d54530c526f14ca0f3b9fc0bfa0b60d45ee Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:18 -0700 +Subject: [PATCH 20/24] virtio-net: don't handle mq request in userspace + handler for vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] 0e6684d12e42752deae8f5ebc56456fed174e0ed +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +virtio_queue_host_notifier_read() tends to read pending event +left behind on ioeventfd in the vhost_net_stop() path, and +attempts to handle outstanding kicks from userspace vq handler. +However, in the ctrl_vq handler, virtio_net_handle_mq() has a +recursive call into virtio_net_set_status(), which may lead to +segmentation fault as shown in below stack trace: + +0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 +1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 +2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 +3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 +4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 +5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 +6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 +7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 +11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 +12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 +13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 +14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 +15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) + at ../../../include/hw/virtio/virtio-bus.h:35 +16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 +17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 +21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 +22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 +23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 +24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 +25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 + +For now, temporarily disable handling MQ request from the ctrl_vq +userspace hanlder to avoid the recursive virtio_net_set_status() +call. Some rework is needed to allow changing the number of +queues without going through a full virtio_net_set_status cycle, +particularly for vhost-vdpa backend. + +This patch will need to be reverted as soon as future patches of +having the change of #queues handled in userspace is merged. + +Fixes: 402378407db ("vhost-vdpa: multiqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f118379bb4..7e172ef829 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1373,6 +1373,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t queue_pairs; ++ NetClientState *nc = qemu_get_queue(n->nic); + + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { +@@ -1404,6 +1405,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + ++ /* Avoid changing the number of queue_pairs for vdpa device in ++ * userspace handler. A future fix is needed to handle the mq ++ * change in userspace handler with vhost-vdpa. Let's disable ++ * the mq handling from userspace for now and only allow get ++ * done through the kernel. Ripples may be seen when falling ++ * back to userspace, but without doing it qemu process would ++ * crash on a recursive entry to virtio_net_set_status(). ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return VIRTIO_NET_ERR; ++ } ++ + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ +-- +2.35.3 + diff --git a/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch b/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch new file mode 100644 index 0000000..4855e59 --- /dev/null +++ b/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch @@ -0,0 +1,60 @@ +From 10b3a7b56dc9b4c88e503c36c1b13d80bcb7b066 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 8 Mar 2022 10:42:51 +0800 +Subject: [PATCH 2/6] virtio-net: fix map leaking on error during receive + +RH-Author: Jon Maloy +RH-MergeRequest: 154: virtio-net: fix map leaking on error during receive +RH-Commit: [1/1] 7178b0cd5ce7c89fe476f2e199c9212c8b89327a (jmaloy/qemu-kvm) +RH-Bugzilla: 2063206 +RH-Acked-by: Jason Wang +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063206 +Upstream: Merged +CVE: CVE-2022-26353 + +commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37 +Author: Jason Wang +Date: Tue Mar 8 10:42:51 2022 +0800 + + virtio-net: fix map leaking on error during receive + + Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + tries to fix the use after free of the sg by caching the virtqueue + elements in an array and unmap them at once after receiving the + packets, But it forgot to unmap the cached elements on error which + will lead to leaking of mapping and other unexpected results. + + Fixing this by detaching the cached elements on error. This addresses + CVE-2022-26353. + + Reported-by: Victor Tom + Cc: qemu-stable@nongnu.org + Fixes: CVE-2022-26353 + Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +(cherry picked from commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37) +Signed-off-by: Jon Maloy +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f2014d5ea0..e1f4748831 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1862,6 +1862,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + + err: + for (j = 0; j < i; j++) { ++ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); + g_free(elems[j]); + } + +-- +2.27.0 + diff --git a/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch new file mode 100644 index 0000000..2e46cff --- /dev/null +++ b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -0,0 +1,52 @@ +From bc307149fe4e3fe2a3e0ac52534383c955051e7e Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:12 -0700 +Subject: [PATCH 14/24] virtio-net: setup vhost_dev and notifiers for cvq only + when feature is negotiated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] 38bcfaa661f437b3dfa6b6f152dffd60073dc054 +RH-Bugzilla: 2069946 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +When the control virtqueue feature is absent or not negotiated, +vhost_net_start() still tries to set up vhost_dev and install +vhost notifiers for the control virtqueue, which results in +erroneous ioctl calls with incorrect queue index sending down +to driver. Do that only when needed. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index e1f4748831..ec045c3f41 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -244,7 +244,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; +- int cvq = n->max_ncs - n->max_queue_pairs; ++ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? ++ n->max_ncs - n->max_queue_pairs : 0; + + if (!get_vhost_net(nc->peer)) { + return; +-- +2.35.3 + diff --git a/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch new file mode 100644 index 0000000..9325d69 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch @@ -0,0 +1,337 @@ +From 31e9e3691789469b93a75d0221387bab3e526094 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 21 Feb 2023 16:22:18 -0500 +Subject: [PATCH 13/13] virtio-scsi: reset SCSI devices from main loop thread + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 264: scsi: protect req->aiocb with AioContext lock +RH-Bugzilla: 2090990 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [3/3] 30d7c2bd868efa6694992e75ace22fb48aef161b + +When an IOThread is configured, the ctrl virtqueue is processed in the +IOThread. TMFs that reset SCSI devices are currently called directly +from the IOThread and trigger an assertion failure in blk_drain() from +the following call stack: + +virtio_scsi_handle_ctrl_req -> virtio_scsi_do_tmf -> device_code_reset +-> scsi_disk_reset -> scsi_device_purge_requests -> blk_drain + + ../block/block-backend.c:1780: void blk_drain(BlockBackend *): Assertion `qemu_in_main_thread()' failed. + +The blk_drain() function is not designed to be called from an IOThread +because it needs the Big QEMU Lock (BQL). + +This patch defers TMFs that reset SCSI devices to a Bottom Half (BH) +that runs in the main loop thread under the BQL. This way it's safe to +call blk_drain() and the assertion failure is avoided. + +Introduce s->tmf_bh_list for tracking TMF requests that have been +deferred to the BH. When the BH runs it will grab the entire list and +process all requests. Care must be taken to clear the list when the +virtio-scsi device is reset or unrealized. Otherwise deferred TMF +requests could execute later and lead to use-after-free or other +undefined behavior. + +The s->resetting counter that's used by TMFs that reset SCSI devices is +accessed from multiple threads. This patch makes that explicit by using +atomic accessor functions. With this patch applied the counter is only +modified by the main loop thread under the BQL but can be read by any +thread. + +Reported-by: Qing Wang +Cc: Paolo Bonzini +Reviewed-by: Eric Blake +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230221212218.1378734-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit be2c42b97c3a3a395b2f05bad1b6c7de20ecf2a5) +Signed-off-by: Stefan Hajnoczi + +Conflicts: +- hw/scsi/virtio-scsi.c + - VirtIOSCSIReq is defined in include/hw/virtio/virtio-scsi.h + downstream instead of hw/scsi/virtio-scsi.c because commit + 3dc584abeef0 ("virtio-scsi: move request-related items from .h to + .c") is missing. Update the struct fields in virtio-scsi.h + downstream. + + - Use qbus_reset_all() downstream instead of bus_cold_reset() because + commit 4a5fc890b1d3 ("scsi: Use device_cold_reset() and + bus_cold_reset()") is missing. + + - Drop GLOBAL_STATE_CODE() because these macros don't exist + downstream. They are assertions/documentation and can be removed + without affecting the code. +--- + hw/scsi/virtio-scsi.c | 155 +++++++++++++++++++++++++------- + include/hw/virtio/virtio-scsi.h | 21 +++-- + 2 files changed, 139 insertions(+), 37 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index a35257c35a..ef19a9bcd0 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -256,6 +256,118 @@ static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d) + } + } + ++static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); ++ BusChild *kid; ++ int target; ++ ++ switch (req->req.tmf.subtype) { ++ case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: ++ if (!d) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET; ++ goto out; ++ } ++ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { ++ req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN; ++ goto out; ++ } ++ qatomic_inc(&s->resetting); ++ qdev_reset_all(&d->qdev); ++ qatomic_dec(&s->resetting); ++ break; ++ ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ target = req->req.tmf.lun[1]; ++ qatomic_inc(&s->resetting); ++ ++ rcu_read_lock(); ++ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { ++ SCSIDevice *d1 = SCSI_DEVICE(kid->child); ++ if (d1->channel == 0 && d1->id == target) { ++ qdev_reset_all(&d1->qdev); ++ } ++ } ++ rcu_read_unlock(); ++ ++ qatomic_dec(&s->resetting); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ break; ++ } ++ ++out: ++ object_unref(OBJECT(d)); ++ ++ virtio_scsi_acquire(s); ++ virtio_scsi_complete_req(req); ++ virtio_scsi_release(s); ++} ++ ++/* Some TMFs must be processed from the main loop thread */ ++static void virtio_scsi_do_tmf_bh(void *opaque) ++{ ++ VirtIOSCSI *s = opaque; ++ QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ virtio_scsi_acquire(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } ++ ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ ++ virtio_scsi_release(s); ++ ++ QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { ++ QTAILQ_REMOVE(&reqs, req, next); ++ virtio_scsi_do_one_tmf_bh(req); ++ } ++} ++ ++static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) ++{ ++ VirtIOSCSIReq *req; ++ VirtIOSCSIReq *tmp; ++ ++ virtio_scsi_acquire(s); ++ ++ if (s->tmf_bh) { ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ ++ /* SAM-6 6.3.2 Hard reset */ ++ req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; ++ virtio_scsi_complete_req(req); ++ } ++ ++ virtio_scsi_release(s); ++} ++ ++static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } ++} ++ + /* Return 0 if the request is ready to be completed and return to guest; + * -EINPROGRESS if the request is submitted and will be completed later, in the + * case of async cancellation. */ +@@ -263,8 +375,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + { + SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun); + SCSIRequest *r, *next; +- BusChild *kid; +- int target; + int ret = 0; + + virtio_scsi_ctx_check(s, d); +@@ -321,15 +431,9 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + break; + + case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET: +- if (!d) { +- goto fail; +- } +- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) { +- goto incorrect_lun; +- } +- s->resetting++; +- qdev_reset_all(&d->qdev); +- s->resetting--; ++ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: ++ virtio_scsi_defer_tmf_to_bh(req); ++ ret = -EINPROGRESS; + break; + + case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET: +@@ -372,22 +476,6 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + break; + +- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: +- target = req->req.tmf.lun[1]; +- s->resetting++; +- +- rcu_read_lock(); +- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) { +- SCSIDevice *d1 = SCSI_DEVICE(kid->child); +- if (d1->channel == 0 && d1->id == target) { +- qdev_reset_all(&d1->qdev); +- } +- } +- rcu_read_unlock(); +- +- s->resetting--; +- break; +- + case VIRTIO_SCSI_T_TMF_CLEAR_ACA: + default: + req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_REJECTED; +@@ -603,7 +691,7 @@ static void virtio_scsi_request_cancelled(SCSIRequest *r) + if (!req) { + return; + } +- if (req->dev->resetting) { ++ if (qatomic_read(&req->dev->resetting)) { + req->resp.cmd.response = VIRTIO_SCSI_S_RESET; + } else { + req->resp.cmd.response = VIRTIO_SCSI_S_ABORTED; +@@ -784,9 +872,12 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); + + assert(!s->dataplane_started); +- s->resetting++; ++ ++ virtio_scsi_reset_tmf_bh(s); ++ ++ qatomic_inc(&s->resetting); + qbus_reset_all(BUS(&s->bus)); +- s->resetting--; ++ qatomic_dec(&s->resetting); + + vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE; + vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE; +@@ -1018,6 +1109,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + VirtIOSCSI *s = VIRTIO_SCSI(dev); + Error *err = NULL; + ++ QTAILQ_INIT(&s->tmf_bh_list); ++ + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, + virtio_scsi_handle_event, +@@ -1055,6 +1148,8 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + { + VirtIOSCSI *s = VIRTIO_SCSI(dev); + ++ virtio_scsi_reset_tmf_bh(s); ++ + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); + } +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 543681bc18..b0e36f25aa 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -77,13 +77,22 @@ struct VirtIOSCSICommon { + VirtQueue **cmd_vqs; + }; + ++struct VirtIOSCSIReq; ++ + struct VirtIOSCSI { + VirtIOSCSICommon parent_obj; + + SCSIBus bus; +- int resetting; ++ int resetting; /* written from main loop thread, read from any thread */ + bool events_dropped; + ++ /* ++ * TMFs deferred to main loop BH. These fields are protected by ++ * virtio_scsi_acquire(). ++ */ ++ QEMUBH *tmf_bh; ++ QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; ++ + /* Fields for dataplane below */ + AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + +@@ -106,13 +115,11 @@ typedef struct VirtIOSCSIReq { + QEMUSGList qsgl; + QEMUIOVector resp_iov; + +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ /* Used for two-stage request submission and TMFs deferred to BH */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; + +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; ++ /* Used for cancellation of request during TMFs */ ++ int remaining; + + SCSIRequest *sreq; + size_t resp_size; +-- +2.37.3 + diff --git a/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch new file mode 100644 index 0000000..face8e6 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch @@ -0,0 +1,110 @@ +From 2754dc2c7def01d7dd1bb39f3e86ef444652d397 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 25 Jan 2022 13:51:14 -0500 +Subject: [PATCH 1/6] virtiofsd: Drop membership of all supplementary groups + (CVE-2022-0358) + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 102: virtiofsd: Drop membership of all supplementary groups (CVE-2022-0358) +RH-Commit: [1/1] 93e56c88277fec8e42559a899d32b80fac4a923f +RH-Bugzilla: 2046198 +RH-Acked-by: Greg Kurz +RH-Acked-by: Sergio Lopez +RH-Acked-by: Laszlo Ersek + +At the start, drop membership of all supplementary groups. This is +not required. + +If we have membership of "root" supplementary group and when we switch +uid/gid using setresuid/setsgid, we still retain membership of existing +supplemntary groups. And that can allow some operations which are not +normally allowed. + +For example, if root in guest creates a dir as follows. + +$ mkdir -m 03777 test_dir + +This sets SGID on dir as well as allows unprivileged users to write into +this dir. + +And now as unprivileged user open file as follows. + +$ su test +$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); + +This will create SGID set executable in test_dir/. + +And that's a problem because now an unpriviliged user can execute it, +get egid=0 and get access to resources owned by "root" group. This is +privilege escalation. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 +Fixes: CVE-2022-0358 +Reported-by: JIETAO XIAO +Suggested-by: Miklos Szeredi +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed missing {}'s style nit +(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 64b5b4fbb1..b3d0674f6d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + + #include "qemu/cutils.h" + #include "passthrough_helpers.h" +@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + #define OURSYS_setresuid SYS_setresuid + #endif + ++static void drop_supplementary_groups(void) ++{ ++ int ret; ++ ++ ret = getgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++ ++ if (!ret) { ++ return; ++ } ++ ++ /* Drop all supplementary groups. We should not need it */ ++ ret = setgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++} ++ + /* + * Change to uid/gid of caller so that file is created with + * ownership of caller. +@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) + + qemu_init_exec_dir(argv[0]); + ++ drop_supplementary_groups(); ++ + pthread_mutex_init(&lo.mutex, NULL); + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; +-- +2.27.0 + diff --git a/SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch b/SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch new file mode 100644 index 0000000..7ee71ae --- /dev/null +++ b/SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch @@ -0,0 +1,63 @@ +From 1da951c4c3b4e403a6c1668a54e6264381c0003d Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 8 Feb 2022 15:48:04 -0500 +Subject: [PATCH 1/3] virtiofsd: Fix breakage due to fuse_init_in size change + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 193: virtiofsd: Fix breakage due to fuse_init_in size change +RH-Commit: [1/1] 5809db034f9361fb462181d71e7cdde1324f8e54 +RH-Bugzilla: 2097209 +RH-Acked-by: German Maglione +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Vivek Goyal +RH-Acked-by: Stefan Hajnoczi + +Kernel version 5.17 has increased the size of "struct fuse_init_in" struct. +Previously this struct was 16 bytes and now it has been extended to +64 bytes in size. + +Once qemu headers are updated to latest, it will expect to receive 64 byte +size struct (for protocol version major 7 and minor > 6). But if guest is +booting older kernel (older than 5.17), then it still sends older +fuse_init_in of size 16 bytes. And do_init() fails. It is expecting +64 byte struct. And this results in mount of virtiofs failing. + +Fix this by parsing 16 bytes only for now. Separate patches will be +posted which will parse rest of the bytes and enable new functionality. +Right now we don't support any of the new functionality, so we don't +lose anything by not parsing bytes beyond 16. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: <20220208204813.682906-2-vgoyal@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a086d54c6ffa38f7e71f182b63a25315304a3392) +--- + tools/virtiofsd/fuse_lowlevel.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e4679c73ab..5d431a7038 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1880,6 +1880,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_mbuf_iter *iter) + { + size_t compat_size = offsetof(struct fuse_init_in, max_readahead); ++ size_t compat2_size = offsetof(struct fuse_init_in, flags) + ++ sizeof(uint32_t); + struct fuse_init_in *arg; + struct fuse_init_out outarg; + struct fuse_session *se = req->se; +@@ -1897,7 +1899,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + + /* ...and now consume the new fields. */ + if (arg->major == 7 && arg->minor >= 6) { +- if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { ++ if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) { + fuse_reply_err(req, EINVAL); + return; + } +-- +2.35.3 + diff --git a/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch b/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch new file mode 100644 index 0000000..e6ffec1 --- /dev/null +++ b/SOURCES/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch @@ -0,0 +1,65 @@ +From ebf6be5ba316ffda354af5eb1f1241ad6543b3cd Mon Sep 17 00:00:00 2001 +From: Yusuke Okada +Date: Thu, 18 Aug 2022 14:46:19 -0400 +Subject: [PATCH 3/3] virtiofsd: use g_date_time_get_microsecond to get + subsecond +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 222: virtiofsd: use g_date_time_get_microsecond to get subsecond +RH-Bugzilla: 2018885 +RH-Acked-by: Vivek Goyal +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Sergio Lopez +RH-Commit: [1/1] da8795576acc7029044a801ef42676d66471a577 + +The "%f" specifier in g_date_time_format() is only available in glib +2.65.2 or later. If combined with older glib, the function returns null +and the timestamp displayed as "(null)". + +For backward compatibility, g_date_time_get_microsecond should be used +to retrieve subsecond. + +In this patch the g_date_time_format() leaves subsecond field as "%06d" +and let next snprintf to format with g_date_time_get_microsecond. + +Signed-off-by: Yusuke Okada +Reviewed-by: Dr. David Alan Gilbert +Message-id: 20220818184618.2205172-1-yokada.996@gmail.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f16d15c9276bd8f501f861c39cbd4adc812d0c1d) +--- + tools/virtiofsd/passthrough_ll.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b3d0674f6d..523d8fbe1e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -3791,6 +3791,7 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile) + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { + g_autofree char *localfmt = NULL; ++ char buf[64]; + + if (current_log_level < level) { + return; +@@ -3803,9 +3804,11 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + fmt); + } else { + g_autoptr(GDateTime) now = g_date_time_new_now_utc(); +- g_autofree char *nowstr = g_date_time_format(now, "%Y-%m-%d %H:%M:%S.%f%z"); ++ g_autofree char *nowstr = g_date_time_format(now, ++ "%Y-%m-%d %H:%M:%S.%%06d%z"); ++ snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now)); + localfmt = g_strdup_printf("[%s] [ID: %08ld] %s", +- nowstr, syscall(__NR_gettid), fmt); ++ buf, syscall(__NR_gettid), fmt); + } + fmt = localfmt; + } +-- +2.35.3 + diff --git a/SOURCES/kvm-vmxcap-Add-5-level-EPT-bit.patch b/SOURCES/kvm-vmxcap-Add-5-level-EPT-bit.patch new file mode 100644 index 0000000..8cdb980 --- /dev/null +++ b/SOURCES/kvm-vmxcap-Add-5-level-EPT-bit.patch @@ -0,0 +1,48 @@ +From f0f87dcea3fe14b20b8599cda9b1151ca2490d0c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 07/18] vmxcap: Add 5-level EPT bit + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [1/2] 4c098f551f1ed8e2a5582f466afda35b28d97055 (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit d312378e59658473aa91aa15c67ec6200d92e5ff +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:16 2022 +0100 + + vmxcap: Add 5-level EPT bit + + 5-level EPT is present in Icelake Server CPUs and is supported by QEMU + ('vmx-page-walk-5'). + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-2-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit d312378e59658473aa91aa15c67ec6200d92e5ff) +Signed-off-by: Jon Maloy +--- + scripts/kvm/vmxcap | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 6fe66d5f57..f140040104 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -249,6 +249,7 @@ controls = [ + bits = { + 0: 'Execute-only EPT translations', + 6: 'Page-walk length 4', ++ 7: 'Page-walk length 5', + 8: 'Paging-structure memory type UC', + 14: 'Paging-structure memory type WB', + 16: '2MB EPT pages', +-- +2.27.0 + diff --git a/SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch b/SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch new file mode 100644 index 0000000..d61e4cf --- /dev/null +++ b/SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch @@ -0,0 +1,135 @@ +From d0826a8c2c3c389eeeed1014d7e316f39f083971 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:31 -0800 +Subject: [PATCH 09/24] x86: Add AMX CPUIDs enumeration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [9/13] fab147992ad927c9538529f018f06e2f48546c5b +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Add AMX primary feature bits XFD and AMX_TILE to +enumerate the CPU's AMX capability. Meanwhile, add +AMX TILE and TMUL CPUID leaf and subleaves which +exist when AMX TILE is present to provide the maximum +capability of TILE and TMUL. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-6-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f21a48171cf3fa39532fc8553fd82e81b88b6474) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 55 ++++++++++++++++++++++++++++++++++++++++--- + target/i386/kvm/kvm.c | 4 +++- + 2 files changed, 55 insertions(+), 4 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cd27c0eb81..09e08f7f38 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = { + #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ + #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ + ++/* CPUID Leaf 0x1D constants: */ ++#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 ++#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 ++#define INTEL_AMX_BYTES_PER_TILE 0x400 ++#define INTEL_AMX_BYTES_PER_ROW 0x40 ++#define INTEL_AMX_TILE_MAX_NAMES 0x8 ++#define INTEL_AMX_TILE_MAX_ROWS 0x10 ++ ++/* CPUID Leaf 0x1E constants: */ ++#define INTEL_AMX_TMUL_MAX_K 0x10 ++#define INTEL_AMX_TMUL_MAX_N 0x40 ++ + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3) + { +@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, +- NULL, NULL, NULL, "avx512-fp16", +- NULL, NULL, "spec-ctrl", "stibp", ++ NULL, NULL, "amx-bf16", "avx512-fp16", ++ "amx-tile", "amx-int8", "spec-ctrl", "stibp", + NULL, "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { +@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "xsaveopt", "xsavec", "xgetbv1", "xsaves", +- NULL, NULL, NULL, NULL, ++ "xfd", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -5593,6 +5605,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + } ++ case 0x1D: { ++ /* AMX TILE */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *eax = INTEL_AMX_TILE_MAX_SUBLEAF; ++ } else if (count == 1) { ++ *eax = INTEL_AMX_TOTAL_TILE_BYTES | ++ (INTEL_AMX_BYTES_PER_TILE << 16); ++ *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); ++ *ecx = INTEL_AMX_TILE_MAX_ROWS; ++ } ++ break; ++ } ++ case 0x1E: { ++ /* AMX TMUL */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); ++ } ++ break; ++ } + case 0x40000000: + /* + * CPUID code in kvm_arch_init_vcpu() ignores stuff +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index b5d98c4361..a64a79d870 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; +- case 0x14: { ++ case 0x14: ++ case 0x1d: ++ case 0x1e: { + uint32_t times; + + c->function = i; +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch b/SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch new file mode 100644 index 0000000..064b124 --- /dev/null +++ b/SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch @@ -0,0 +1,112 @@ +From 3ba6092159b6e3b25505af2a49c0f6ac99043db9 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:28 -0800 +Subject: [PATCH 06/24] x86: Add AMX XTILECFG and XTILEDATA components + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [6/13] 95229f87b4494631d57232f374a174f7bc95843a +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +The AMX TILECFG register and the TMMx tile data registers are +saved/restored via XSAVE, respectively in state component 17 +(64 bytes) and state component 18 (8192 bytes). + +Add AMX feature bits to x86_ext_save_areas array to set +up AMX components. Add structs that define the layout of +AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the +structs sizes. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-3-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 8 ++++++++ + target/i386/cpu.h | 18 +++++++++++++++++- + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f44fad3a2a..0453c27c9d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { + [XSTATE_PKRU_BIT] = + { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, + .size = sizeof(XSavePKRU) }, ++ [XSTATE_XTILE_CFG_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILECFG), ++ }, ++ [XSTATE_XTILE_DATA_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILEDATA) ++ }, + }; + + static uint32_t xsave_area_size(uint64_t mask) +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 5d9702a991..e1dd8b9555 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -537,6 +537,8 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_BIT 6 + #define XSTATE_Hi16_ZMM_BIT 7 + #define XSTATE_PKRU_BIT 9 ++#define XSTATE_XTILE_CFG_BIT 17 ++#define XSTATE_XTILE_DATA_BIT 18 + + #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) + #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) +@@ -845,6 +847,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) + /* AVX512_FP16 instruction */ + #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) ++/* AMX tile (two-dimensional register) */ ++#define CPUID_7_0_EDX_AMX_TILE (1U << 24) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ +@@ -1348,6 +1352,16 @@ typedef struct XSavePKRU { + uint32_t padding; + } XSavePKRU; + ++/* Ext. save area 17: AMX XTILECFG state */ ++typedef struct XSaveXTILECFG { ++ uint8_t xtilecfg[64]; ++} XSaveXTILECFG; ++ ++/* Ext. save area 18: AMX XTILEDATA state */ ++typedef struct XSaveXTILEDATA { ++ uint8_t xtiledata[8][1024]; ++} XSaveXTILEDATA; ++ + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); +@@ -1355,6 +1369,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); + QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); + QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); + + typedef struct ExtSaveArea { + uint32_t feature, bits; +@@ -1362,7 +1378,7 @@ typedef struct ExtSaveArea { + uint32_t ecx; + } ExtSaveArea; + +-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) ++#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) + + extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; + +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch b/SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch new file mode 100644 index 0000000..5c0fd0a --- /dev/null +++ b/SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch @@ -0,0 +1,62 @@ +From 098d6a965ada02f5897b73f0489413a050a176bb Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:30 -0800 +Subject: [PATCH 08/24] x86: Add XFD faulting bit for state components + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [8/13] 0b1b46c5d075655ab94bc79e042b187c5dc55551 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Intel introduces XFD faulting mechanism for extended +XSAVE features to dynamically enable the features in +runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set +as 1, it indicates support for XFD faulting of this +state component. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 3 ++- + target/i386/cpu.h | 2 ++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c19b51ea32..cd27c0eb81 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5503,7 +5503,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; +- *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; ++ *ecx = esa->ecx & ++ (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 58676390e6..f2bdef9c26 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -555,8 +555,10 @@ typedef enum X86Seg { + #define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 ++#define ESA_FEATURE_XFD_BIT 2 + + #define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + + + /* CPUID feature words */ +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch new file mode 100644 index 0000000..56ecea7 --- /dev/null +++ b/SOURCES/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch @@ -0,0 +1,68 @@ +From 31530bf621dc28689142ffa83d025ec4a4f110c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 11 Jan 2022 18:29:31 +0000 +Subject: [PATCH 2/2] x86: Add q35 RHEL 8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 99: x86: Add q35 RHEL 8.6.0 machine type +RH-Commit: [1/1] a694724b6fa972e312bb76b5569bc979d6c596ef +RH-Bugzilla: 2031035 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck + +Add the new 8.6.0 machine type; note that while the -AV +notation has gone in the product naming, just keep the smbios +definitions the same for consistency. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index f6e77bca0e..5559261d9e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ + static void pc_q35_init_rhel850(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) + static void pc_q35_machine_rhel850_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch b/SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch new file mode 100644 index 0000000..2db4c60 --- /dev/null +++ b/SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch @@ -0,0 +1,88 @@ +From 6eae12166341c236da023e5117b64b842ae72083 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:27 -0800 +Subject: [PATCH 05/24] x86: Fix the 64-byte boundary enumeration for extended + state + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [5/13] 64fc93e3b0ad0fc56da9d71b33d9eefd3cbba1d7 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1] +indicate whether the extended state component locates +on the next 64-byte boundary following the preceding state +component when the compacted format of an XSAVE area is +used. + +Right now, they are all zero because no supported component +needed the bit to be set, but the upcoming AMX feature will +use it. Fix the subleaves value according to KVM's supported +cpuid. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 131266b7565bd437127bd231563572696bb27235) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 1 + + target/i386/cpu.h | 6 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + 3 files changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index dd6935b1dd..f44fad3a2a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5495,6 +5495,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; ++ *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c6a6c871f1..5d9702a991 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -548,6 +548,11 @@ typedef enum X86Seg { + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) + ++#define ESA_FEATURE_ALIGN64_BIT 1 ++ ++#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++ ++ + /* CPUID feature words */ + typedef enum FeatureWord { + FEAT_1_EDX, /* CPUID[1].EDX */ +@@ -1354,6 +1359,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); + typedef struct ExtSaveArea { + uint32_t feature, bits; + uint32_t offset, size; ++ uint32_t ecx; + } ExtSaveArea; + + #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 7b004065ae..86ef7b2712 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void) + if (sz != 0) { + assert(esa->size == sz); + esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); ++ esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); + } + } + } +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch b/SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch new file mode 100644 index 0000000..c2ab95d --- /dev/null +++ b/SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch @@ -0,0 +1,215 @@ +From 50840e01d05a466a1dfbc219e49233834e5d7ed0 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Wed, 16 Feb 2022 22:04:29 -0800 +Subject: [PATCH 07/24] x86: Grant AMX permission for guest + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [7/13] 437578191f61139ca710cc7045ab38eb0d05eae2 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Kernel allocates 4K xstate buffer by default. For XSAVE features +which require large state component (e.g. AMX), Linux kernel +dynamically expands the xstate buffer only after the process has +acquired the necessary permissions. Those are called dynamically- +enabled XSAVE features (or dynamic xfeatures). + +There are separate permissions for native tasks and guests. + +Qemu should request the guest permissions for dynamic xfeatures +which will be exposed to the guest. This only needs to be done +once before the first vcpu is created. + +KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to +get host side supported_xcr0 and Qemu can decide if it can request +dynamically enabled XSAVE features permission. +https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/ + +Suggested-by: Paolo Bonzini +Signed-off-by: Yang Zhong +Signed-off-by: Jing Liu +Message-Id: <20220217060434.52460-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 19db68ca68a78fa033a21d419036b6e416554564) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 7 +++++ + target/i386/cpu.h | 4 +++ + target/i386/kvm/kvm-cpu.c | 12 ++++---- + target/i386/kvm/kvm.c | 57 ++++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + 5 files changed, 75 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0453c27c9d..c19b51ea32 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6027,6 +6027,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + CPUX86State *env = &cpu->env; + int i; + uint64_t mask; ++ static bool request_perm; + + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_COMP_LO] = 0; +@@ -6042,6 +6043,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + } + } + ++ /* Only request permission for first vcpu */ ++ if (kvm_enabled() && !request_perm) { ++ kvm_request_xsave_components(cpu, mask); ++ request_perm = true; ++ } ++ + env->features[FEAT_XSAVE_COMP_LO] = mask; + env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + } +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e1dd8b9555..58676390e6 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -549,6 +549,10 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) ++#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) ++#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) ++ ++#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 86ef7b2712..bdc967c484 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) + static void kvm_cpu_xsave_init(void) + { + static bool first = true; +- KVMState *s = kvm_state; ++ uint32_t eax, ebx, ecx, edx; + int i; + + if (!first) { +@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void) + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (esa->size) { +- int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); +- if (sz != 0) { +- assert(esa->size == sz); +- esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); +- esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a668f521ac..b5d98c4361 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -17,6 +17,7 @@ + #include "qapi/error.h" + #include + #include ++#include + + #include + #include "standard-headers/asm-x86/kvm_para.h" +@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + struct kvm_cpuid2 *cpuid; + uint32_t ret = 0; + uint32_t cpuid_1_edx; ++ uint64_t bitmask; + + cpuid = get_supported_cpuid(s); + +@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + if (!has_msr_arch_capabs) { + ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; + } ++ } else if (function == 0xd && index == 0 && ++ (reg == R_EAX || reg == R_EDX)) { ++ struct kvm_device_attr attr = { ++ .group = 0, ++ .attr = KVM_X86_XCOMP_GUEST_SUPP, ++ .addr = (unsigned long) &bitmask ++ }; ++ ++ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); ++ if (!sys_attr) { ++ warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ } ++ ++ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); ++ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by +@@ -5054,3 +5075,39 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return !sev_es_enabled(); + } ++ ++#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 ++ ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) ++{ ++ KVMState *s = kvm_state; ++ uint64_t supported; ++ ++ mask &= XSTATE_DYNAMIC_MASK; ++ if (!mask) { ++ return; ++ } ++ /* ++ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. ++ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned ++ * about them already because they are not supported features. ++ */ ++ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); ++ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; ++ mask &= supported; ++ ++ while (mask) { ++ int bit = ctz64(mask); ++ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); ++ if (rc) { ++ /* ++ * Older kernel version (<5.17) do not support ++ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return ++ * any dynamic feature from kvm_arch_get_supported_cpuid. ++ */ ++ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " ++ "for feature bit %d", bit); ++ } ++ mask &= ~BIT_ULL(bit); ++ } ++} +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index a978509d50..4124912c20 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); + + bool kvm_enable_sgx_provisioning(KVMState *s); ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #endif +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch b/SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch new file mode 100644 index 0000000..e4846b3 --- /dev/null +++ b/SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch @@ -0,0 +1,178 @@ +From 90a276ed72deab84f3fdd4b57e9ccfc6514934fb Mon Sep 17 00:00:00 2001 +From: Zeng Guang +Date: Wed, 16 Feb 2022 22:04:33 -0800 +Subject: [PATCH 11/24] x86: Support XFD and AMX xsave data migration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [11/13] 4ff6e5544ffdac4e6d2f568f7f63b937502ca6c5 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +XFD(eXtended Feature Disable) allows to enable a +feature on xsave state while preventing specific +user threads from using the feature. + +Support save and restore XFD MSRs if CPUID.D.1.EAX[4] +enumerate to be valid. Likewise migrate the MSRs and +related xsave state necessarily. + +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-8-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cdec2b753b487d9e8aab028231c35d87789ea083) +Signed-off-by: Paul Lai +--- + target/i386/cpu.h | 9 +++++++++ + target/i386/kvm/kvm.c | 18 +++++++++++++++++ + target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 73 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 14a3501b87..8ab2a4042a 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -505,6 +505,9 @@ typedef enum X86Seg { + + #define MSR_VM_HSAVE_PA 0xc0010117 + ++#define MSR_IA32_XFD 0x000001c4 ++#define MSR_IA32_XFD_ERR 0x000001c5 ++ + #define MSR_IA32_BNDCFGS 0x00000d90 + #define MSR_IA32_XSS 0x00000da0 + #define MSR_IA32_UMWAIT_CONTROL 0xe1 +@@ -870,6 +873,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ + #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) ++/* XFD Extend Feature Disabled */ ++#define CPUID_D_1_EAX_XFD (1U << 4) + + /* Packets which contain IP payload have LIP values */ + #define CPUID_14_0_ECX_LIP (1U << 31) +@@ -1610,6 +1615,10 @@ typedef struct CPUX86State { + uint64_t msr_rtit_cr3_match; + uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + ++ /* Per-VCPU XFD MSRs */ ++ uint64_t msr_xfd; ++ uint64_t msr_xfd_err; ++ + /* exception/interrupt handling */ + int error_code; + int exception_is_int; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index d3d476df27..b1128b0e07 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + env->msr_ia32_sgxlepubkeyhash[3]); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, ++ env->msr_xfd); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, ++ env->msr_xfd_err); ++ } ++ + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ + } +@@ -3571,6 +3578,11 @@ static int kvm_get_msrs(X86CPU *cpu) + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); ++ } ++ + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; +@@ -3870,6 +3882,12 @@ static int kvm_get_msrs(X86CPU *cpu) + env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = + msrs[i].data; + break; ++ case MSR_IA32_XFD: ++ env->msr_xfd = msrs[i].data; ++ break; ++ case MSR_IA32_XFD_ERR: ++ env->msr_xfd_err = msrs[i].data; ++ break; + } + } + +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 83c2b91529..3977e9d8f8 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = { + } + }; + ++static bool xfd_msrs_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); ++} ++ ++static const VMStateDescription vmstate_msr_xfd = { ++ .name = "cpu/msr_xfd", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = xfd_msrs_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.msr_xfd, X86CPU), ++ VMSTATE_UINT64(env.msr_xfd_err, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++#ifdef TARGET_X86_64 ++static bool amx_xtile_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); ++} ++ ++static const VMStateDescription vmstate_amx_xtile = { ++ .name = "cpu/intel_amx_xtile", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = amx_xtile_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), ++ VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++#endif ++ + const VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = { + #endif + &vmstate_msr_tsx_ctrl, + &vmstate_msr_intel_sgx, ++ &vmstate_msr_xfd, ++#ifdef TARGET_X86_64 ++ &vmstate_amx_xtile, ++#endif + NULL + } + }; +-- +2.35.3 + diff --git a/SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch b/SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch new file mode 100644 index 0000000..13566b1 --- /dev/null +++ b/SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch @@ -0,0 +1,182 @@ +From 28cf1b55f346a9f56e84fa57921f5a28a99cd59b Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:32 -0800 +Subject: [PATCH 10/24] x86: add support for KVM_CAP_XSAVE2 and AMX state + migration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [10/13] d584f455ba1ecd8a4a87f3470e6aac24ba9a1f5a +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave +area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE +under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB. +Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it. + +Signed-off-by: Jing Liu +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-7-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e56dd3c70abb31893c61ac834109fa7a38841330) +Signed-off-by: Paul Lai +--- + target/i386/cpu.h | 4 ++++ + target/i386/kvm/kvm.c | 42 ++++++++++++++++++++++++-------------- + target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++ + 3 files changed, 59 insertions(+), 15 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f2bdef9c26..14a3501b87 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1522,6 +1522,10 @@ typedef struct CPUX86State { + uint64_t opmask_regs[NB_OPMASK_REGS]; + YMMReg zmmh_regs[CPU_NB_REGS]; + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; ++#ifdef TARGET_X86_64 ++ uint8_t xtilecfg[64]; ++ uint8_t xtiledata[8192]; ++#endif + + /* sysenter registers */ + uint32_t sysenter_cs; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a64a79d870..d3d476df27 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters; + static uint32_t num_architectural_pmu_fixed_counters; + + static int has_xsave; ++static int has_xsave2; + static int has_xcrs; + static int has_pit_state2; + static int has_exception_payload; +@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker; + + #define KVM_MAX_CPUID_ENTRIES 100 + ++static void kvm_init_xsave(CPUX86State *env) ++{ ++ if (has_xsave2) { ++ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); ++ } else if (has_xsave) { ++ env->xsave_buf_len = sizeof(struct kvm_xsave); ++ } else { ++ return; ++ } ++ ++ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); ++ memset(env->xsave_buf, 0, env->xsave_buf_len); ++ /* ++ * The allocated storage must be large enough for all of the ++ * possible XSAVE state components. ++ */ ++ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= ++ env->xsave_buf_len); ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + cpuid_i = 0; + ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; +@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + if (r) { + goto fail; + } +- +- if (has_xsave) { +- env->xsave_buf_len = sizeof(struct kvm_xsave); +- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); +- memset(env->xsave_buf, 0, env->xsave_buf_len); +- +- /* +- * The allocated storage must be large enough for all of the +- * possible XSAVE state components. +- */ +- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) +- <= env->xsave_buf_len); +- } ++ kvm_init_xsave(env); + + max_nested_state_len = kvm_max_nested_state_length(); + if (max_nested_state_len > 0) { +@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu) + { + CPUX86State *env = &cpu->env; + void *xsave = env->xsave_buf; +- int ret; ++ int type, ret; + + if (!has_xsave) { + return kvm_get_fpu(cpu); + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); ++ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; ++ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); + if (ret < 0) { + return ret; + } +diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c +index ac61a96344..996e9f3bfe 100644 +--- a/target/i386/xsave_helper.c ++++ b/target/i386/xsave_helper.c +@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) + + memcpy(pkru, &env->pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); ++ } + #endif + } + +@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) + pkru = buf + e->offset; + memcpy(&env->pkru, pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ const XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ const XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); ++ } + #endif + } +-- +2.35.3 + diff --git a/SOURCES/kvm-x86.conf b/SOURCES/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/SOURCES/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/SOURCES/kvm.conf b/SOURCES/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/SOURCES/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/SOURCES/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/SOURCES/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/SOURCES/qemu-pr-helper.service b/SOURCES/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/SOURCES/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/SOURCES/qemu-pr-helper.socket b/SOURCES/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/SOURCES/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/SOURCES/tests_data_acpi_pc_SSDT.dimmpxm b/SOURCES/tests_data_acpi_pc_SSDT.dimmpxm new file mode 100644 index 0000000000000000000000000000000000000000..ac55387d57e48adb99eb738a102308688a262fb8 GIT binary patch literal 734 zcmZWnJ&4m_7=F`~Ynryuv=x7-N9J4Xbr8Wp(xkD0CMii;gd8+KN84*Ve0b*|BDB5X z>Ip(G+#(|Bkx_6L++3ZUT*TqP-9>Ov^X72yT;BJ~`@DI+=Xv-{Q?kCK0H|I=7h5K; z^&~}A<8J_HJm`qni~811-)!{lg}fEx2GegTQb7{lkd5ln$_Wwsc7E8beYKIfi&D%}l!?7VZ{Ys}%!${^h_4uRje2a1x9h2(!T$B95k#LXjydBYP-~j8R)y zeGVX>jc_E`Y1Y^9W0&*q2N-9bv?yz3~JX!?c7r^o$`ZR+iCa z(*iQWMsNo+x*P#Tl{P5cNK$f+9f&Eq1PhXd`I8`-M6pnE1p4e`o#xC%V!B literal 0 HcmV?d00001 diff --git a/SOURCES/tests_data_acpi_q35_FACP.slic b/SOURCES/tests_data_acpi_q35_FACP.slic new file mode 100644 index 0000000000000000000000000000000000000000..15986e095cf2db7ee92f7ce113c1d46d54018c62 GIT binary patch literal 244 zcmZ>BbPo8!z`($K$vMa|*h9hB703YsMj&>I2yh0`Fu=&b$*>8?VPXanY!FcvC=KFq z0NDyKaV7>90if7_{{Q?Cl?)6Ip(G+#(|Bkx_6L++3ZUT$~PE#YJ#X^X72yT;BJ~`@DI+=Xv-{Q?|aO04Sy2mYOE7 z_hePm;%@+`Kj=uDhS&(sAyMoNo*B}*3iU(J<~gC zo622d*>rq1L=6TxDBANJ&S~9JRNkIf5ulrLtHB$Mp2;g+UE&qdxIg=uvFD`(HChdw zZ_A=J`;`fA1XCbfnJq?Ni?>#<&$(xxN_T@FJMY}}op3Xl=23F8@cq^E-Sx+kbo^QU z@V55ech^^wx$q_{afNekwPJwJzZ|&X^{2rAPJ)pOVKx{;#L-kzC^AK5WDlj0F=`8| zPl6W{HXR&I;%k-@0G}NCebPtGWPq*?P%oSI!rP}F%MkDxtKLf4-}hbLUR3p-ATW+7 n8`Ncol)c!D(d`6fQx)#t0NB(8Uc12Q|8SbYhnR@A=!SHy6V5FNJ%-&%O8V`fj)#OzB2uGw|c}i-+q^1^MKw z_VHcygYT}d#t$oR)3T7e5LPP=#O$kq8(w=B4B#{vxe%s8(#DyVKR7Unu8X2Ru zu>3T5IpLGR@r15fP5^vzWXMNT3lV0eDx%=gIA z5)JvVL=50yh<*6jkA~!l$Ftx$MH~RHMeidlrF>7Z^kjq~kM_qC;7rSwprofvRkPDv zOSNnuBWwg`Fr&*6Fic^K!HGB}_rwKC(IHrnEXmT^8?<~qjw^HGH nl43HK9Wu^ho1!~0$|p*~zX9-x4PM>g?0-1R;a8+qKg)jr(Nxg^ literal 0 HcmV?d00001 diff --git a/SOURCES/udev-kvm-check.c b/SOURCES/udev-kvm-check.c new file mode 100644 index 0000000..928b9de --- /dev/null +++ b/SOURCES/udev-kvm-check.c @@ -0,0 +1,155 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0 || count > threshold) { + emit_count_message(count); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} diff --git a/SOURCES/vhost.conf b/SOURCES/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/SOURCES/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec new file mode 100644 index 0000000..b2f9cff --- /dev/null +++ b/SPECS/qemu-kvm.spec @@ -0,0 +1,4868 @@ +%global SLOF_gittagdate 20191022 + +%global SLOF_gittagcommit 899d9883 + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 1 +%global have_gluster 1 +%global have_kvm_setup 0 +%global have_memlock_limits 0 + + + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x + %global have_kvm_setup 1 +%endif +%ifarch ppc + %global kvm_target ppc +%endif +%ifarch aarch64 + %global kvm_target aarch64 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +%if %{have_spice} \ +Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_opengl} \ +Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_usbredir} \ +Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma <= %{epoch}:%{version}-%{release} \ +Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 6.2.0 +Release: 32%{?rcrel}%{?dist} +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +Source0: http://wiki.qemu.org/download/qemu-6.2.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c +Source36: README.tests +Source37: tests_data_acpi_pc_SSDT.dimmpxm +Source38: tests_data_acpi_q35_FACP.slic +Source39: tests_data_acpi_q35_SSDT.dimmpxm +Source40: tests_data_acpi_virt_SSDT.memhp + +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-compat-Update-hw_compat_rhel_8_5.patch +Patch0020: 0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch +Patch0021: 0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +Patch0022: 0022-Fix-virtio-net-pci-vectors-compat.patch +Patch0023: 0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +Patch0024: 0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +Patch0025: 0025-redhat-Add-s390x-machine-type-compatibility-handling.patch +# For bz#2005325 - Fix CPU Model for new IBM Z Hardware - qemu part +Patch26: kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch +# For bz#2031041 - Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le] +Patch27: kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch31: kvm-hw-arm-virt-Add-8.6-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch32: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +# For bz#2029582 - [8.6] machine types: 6.2: Fix prefer_sockets +Patch33: kvm-rhel-machine-types-x86-set-prefer_sockets.patch +# For bz#2036580 - CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8] +Patch34: kvm-acpi-validate-hotplug-selector-on-access.patch +# For bz#2031035 - Add rhel-8.6.0 machine types for RHEL 8.6 [x86] +Patch35: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +# For bz#2046198 - CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6] +Patch36: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +# For bz#2033279 - [wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back +Patch37: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch38: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch39: kvm-iotests-stream-error-on-reset-New-test.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch40: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch41: kvm-block-rbd-workaround-for-ceph-issue-53784.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch42: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch43: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch44: kvm-doc-Add-the-SGX-numa-description.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch45: kvm-Enable-SGX-RH-Only.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch46: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch47: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch48: kvm-iotests-block-status-cache-New-test.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch49: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch50: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch51: kvm-iotests.py-Add-QemuStorageDaemon-class.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch52: kvm-iotests-281-Test-lingering-timers.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch53: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch54: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch55: kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch56: kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch57: kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch +# For bz#2060843 - [virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0] +Patch58: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch59: kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch60: kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch61: kvm-vmxcap-Add-5-level-EPT-bit.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch62: kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch63: kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch64: kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch65: kvm-tests-acpi-add-SLIC-table-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch66: kvm-tests-acpi-SLIC-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch67: kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch68: kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch69: kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch70: kvm-tests-acpi-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch71: kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch +# For bz#2068202 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0] +Patch72: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch73: kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch74: kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch +# For bz#2071070 - s390x/css: fix PMCW invalid mask +Patch75: kvm-s390x-css-fix-PMCW-invalid-mask.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch76: kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch77: kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch78: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch +# For bz#2040738 - CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8] +Patch79: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch +# For bz#2063262 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] +Patch80: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch +# For bz#2043830 - [IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU +Patch81: kvm-s390x-ipl-support-extended-kernel-command-line-size.patch +# For bz#2063206 - CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8] +Patch82: kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch83: kvm-qcow2-Improve-refcount-structure-rebuilding.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch84: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch85: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch86: kvm-iotests-108-Fix-when-missing-user_allow_other.patch +# For bz#2065043 - Remove upstream-only devices from the qemu-kvm binary +Patch87: kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch +# For bz#2070417 - Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0] +Patch88: kvm-target-i386-properly-reset-TSC-on-reset.patch +# For bz#2040734 - CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7] +Patch89: kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch90: kvm-virtio-gpu-do-not-byteswap-padding.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch91: kvm-linux-headers-update-to-5.16-rc1.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch92: kvm-linux-headers-Update-headers-to-v5.17-rc1.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch93: kvm-linux-headers-include-missing-changes-from-5.17.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch94: kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch95: kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch96: kvm-x86-Grant-AMX-permission-for-guest.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch97: kvm-x86-Add-XFD-faulting-bit-for-state-components.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch98: kvm-x86-Add-AMX-CPUIDs-enumeration.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch99: kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch100: kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch101: kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch102: kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch103: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch104: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch105: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch106: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch107: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch108: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch109: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +# For bz#2029980 - Failed assertion in IDE emulation with Ceph backend +Patch110: kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch111: kvm-block-Make-bdrv_refresh_limits-non-recursive.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch112: kvm-iotests-Allow-using-QMP-with-the-QSD.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch113: kvm-iotests-graph-changes-while-io-New-test.patch +# For bz#2097209 - [virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111 +Patch114: kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch +# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8] +Patch115: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8] +Patch116: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch117: kvm-migration-Never-call-twice-qemu_target_page_size.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch118: kvm-multifd-Rename-used-field-to-num.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch119: kvm-multifd-Add-missing-documentation.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch120: kvm-multifd-The-variable-is-only-used-inside-the-loop.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch121: kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch122: kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch123: kvm-multifd-Fill-offset-and-block-for-reception.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch124: kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch125: kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch126: kvm-migration-All-this-fields-are-unsigned.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch127: kvm-multifd-Move-iov-from-pages-to-params.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch128: kvm-multifd-Make-zlib-use-iov-s.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch129: kvm-multifd-Make-zstd-use-iov-s.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch130: kvm-multifd-Remove-send_write-method.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch131: kvm-multifd-Use-a-single-writev-on-the-send-side.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch132: kvm-multifd-Use-normal-pages-array-on-the-send-side.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch133: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch134: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch135: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch136: kvm-migration-Add-migrate_use_tls-helper.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch137: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch138: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch139: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch140: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch141: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch142: kvm-migration-Change-zero_copy_send-from-migration-param.patch +# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration +Patch143: kvm-migration-Add-migration_incoming_transport_cleanup.patch +# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration +Patch144: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch145: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch146: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch147: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch148: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch149: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch150: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch151: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch152: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch153: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +# For bz#2105410 - Stalled IO Operations in VM +Patch154: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +# For bz#2105410 - Stalled IO Operations in VM +Patch155: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch156: kvm-migration-Introduce-ram_transferred_add.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch157: kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch158: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch159: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch160: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch161: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch162: kvm-migration-add-remaining-params-has_-true-in-migratio.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch163: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +# For bz#2112296 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions +Patch164: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch +# For bz#2120279 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7] +Patch165: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch +# For bz#2117149 - Can't run when memory backing with hugepages and backend type memfd +Patch166: kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch167: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch168: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch169: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch170: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +# For bz#2018885 - [virtiofs] virtiofsd debug log's timestamp is NULL +Patch171: kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch172: kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch173: kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch174: kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch175: kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch176: kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch177: kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch178: kvm-Update-linux-headers-to-v6.0-rc4.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch179: kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch180: kvm-s390x-pci-enable-for-load-store-interpretation.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch181: kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch182: kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch183: kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch184: kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch185: kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch186: kvm-dump-Use-ERRP_GUARD.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch187: kvm-dump-Remove-the-sh_info-variable.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch188: kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch189: kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch190: kvm-dump-Add-more-offset-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch191: kvm-dump-Introduce-dump_is_64bit-helper-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch192: kvm-dump-Consolidate-phdr-note-writes.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch193: kvm-dump-Cleanup-dump_begin-write-functions.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch194: kvm-dump-Consolidate-elf-note-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch195: kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch196: kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch197: kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch198: kvm-dump-Rework-get_start_block.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch199: kvm-dump-Rework-filter-area-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch200: kvm-dump-Rework-dump_calculate_size-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch201: kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch202: kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch203: kvm-dump-simplify-a-bit-kdump-get_next_page.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch204: kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch205: kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch206: kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch207: kvm-dump-Reorder-struct-DumpState.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch208: kvm-dump-Reintroduce-memory_offset-and-section_offset.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch209: kvm-dump-Add-architecture-section-and-section-string-tab.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch210: kvm-s390x-Add-protected-dump-cap.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch211: kvm-s390x-Introduce-PV-query-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch212: kvm-include-elf.h-add-s390x-note-types.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch213: kvm-s390x-Add-KVM-PV-dump-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch214: kvm-s390x-pv-Add-dump-support.patch +# For bz#2129760 - CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8] +Patch215: kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch216: kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch217: kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch218: kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch219: kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch +# For bz#2141896 - VMs hung on vnc_clipboard_send +Patch220: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch221: kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch222: kvm-hw-display-qxl-Document-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch223: kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch224: kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch225: kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch +# For bz#2155448 - RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part +Patch226: kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch227: kvm-block-mirror-Do-not-wait-for-active-writes.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch228: kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch +# For bz#2125119 - Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O +Patch229: kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch230: kvm-accel-introduce-accelerator-blocker-API.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch231: kvm-KVM-keep-track-of-running-ioctls.patch +# For bz#2161188 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch232: kvm-kvm-Atomic-memslot-updates.patch +# For bz#2074205 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set +Patch233: kvm-migration-Read-state-once.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch234: kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch235: kvm-s390x-pci-coalesce-unmap-operations.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch236: kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch +# For bz#2163713 - [s390x] VM fails to start with ISM passed through +Patch237: kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch238: kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch239: kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch240: kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch +# For bz#2147617 - qemu-img finishes successfully while having errors in commit or bitmaps operations +Patch241: kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch +# For bz#2137740 - Multifd migration fails under a weak network/socket ordering race +Patch242: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +# For bz#2137740 - Multifd migration fails under a weak network/socket ordering race +Patch243: kvm-migration-check-magic-value-for-deciding-the-mapping.patch +# For bz#2168187 - [s390x] qemu-kvm coredumps when SE crashes +Patch244: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch245: kvm-aio_wait_kick-add-missing-memory-barrier.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch246: kvm-qatomic-add-smp_mb__before-after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch247: kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch248: kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch249: kvm-edu-add-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch250: kvm-aio-wait-switch-to-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch251: kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch252: kvm-physmem-add-missing-memory-barrier.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch253: kvm-async-update-documentation-of-the-memory-barriers.patch +# For bz#2168472 - Guest hangs when starting or rebooting +Patch254: kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch255: kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch +# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0] +Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch + +BuildRequires: wget +BuildRequires: rpm-build +BuildRequires: ninja-build +#BuildRequires: meson >= 0.58.2 +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.23 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +BuildRequires: python3-sphinx +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +BuildRequires: libseccomp-devel >= 2.4.0 +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtiofs +BuildRequires: libcap-ng-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +BuildRequires: python3-sphinx +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.6.0 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +%endif + +BuildRequires: perl-Test-Harness + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +Requires: qemu-kvm-docs = %{epoch}:%{version}-%{release} +%rhev_ma_conflicts qemu-kvm + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: %{name}-common = %{epoch}:%{version}-%{release} +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: libseccomp >= 2.4.0 +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_kvm_setup} +Requires(post): systemd-units +Requires(preun): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.23 +%if %{have_fdt} +Requires: libfdt >= 1.6.0 +%endif + +%rhev_ma_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + +%package -n qemu-kvm-docs +Summary: qemu-kvm documentation + +%description -n qemu-kvm-docs +qemu-kvm-docs provides documentation files regarding qemu-kvm. + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhev_ma_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif + +%rhev_ma_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%if %{have_spice} +%package ui-spice +Summary: QEMU spice support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%if %{have_opengl} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%endif + +%description ui-spice +This package provides spice support. +%endif + + +%if %{have_opengl} +%package ui-opengl +Summary: QEMU opengl support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers + +%description ui-opengl +This package provides opengl support. +%endif + +%if %{have_usbredir} +%package hw-usbredir +Summary: QEMU usbredir support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: usbredir >= 0.7.1 + +%description hw-usbredir +This package provides usbredir support. +%endif + + +%prep +%setup -q -n qemu-%{version}%{?rcstr} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp +%autopatch -p1 + +%global qemu_kvm_build qemu_kvm_build +mkdir -p %{qemu_kvm_build} + +cp -f %{SOURCE37} tests/data/acpi/pc/SSDT.dimmpxm +cp -f %{SOURCE38} tests/data/acpi/q35/FACP.slic +cp -f %{SOURCE39} tests/data/acpi/q35/SSDT.dimmpxm +cp -f %{SOURCE40} tests/data/acpi/virt/SSDT.memhp + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + + +%define disable_everything \\\ + --disable-alsa \\\ + --disable-attr \\\ + --disable-auth-pam \\\ + --disable-avx2 \\\ + --disable-avx512f \\\ + --disable-bochs \\\ + --disable-bpf \\\ + --disable-brlapi \\\ + --disable-bsd-user \\\ + --disable-bzip2 \\\ + --disable-cap-ng \\\ + --disable-capstone \\\ + --disable-cfi \\\ + --disable-cfi-debug \\\ + --disable-cloop \\\ + --disable-cocoa \\\ + --disable-coreaudio \\\ + --disable-coroutine-pool \\\ + --disable-crypto-afalg \\\ + --disable-curl \\\ + --disable-curses \\\ + --disable-debug-info \\\ + --disable-debug-mutex \\\ + --disable-debug-tcg \\\ + --disable-dmg \\\ + --disable-docs \\\ + --disable-dsound \\\ + --disable-fdt \\\ + --disable-fuse \\\ + --disable-fuse-lseek \\\ + --disable-gcrypt \\\ + --disable-gettext \\\ + --disable-gio \\\ + --disable-glusterfs \\\ + --disable-gnutls \\\ + --disable-gtk \\\ + --disable-guest-agent \\\ + --disable-guest-agent-msi \\\ + --disable-hax \\\ + --disable-hvf \\\ + --disable-iconv \\\ + --disable-jack \\\ + --disable-kvm \\\ + --disable-l2tpv3 \\\ + --disable-libdaxctl \\\ + --disable-libiscsi \\\ + --disable-libnfs \\\ + --disable-libpmem \\\ + --disable-libssh \\\ + --disable-libudev \\\ + --disable-libusb \\\ + --disable-libxml2 \\\ + --disable-linux-aio \\\ + --disable-linux-io-uring \\\ + --disable-linux-user \\\ + --disable-live-block-migration \\\ + --disable-lto \\\ + --disable-lzfse \\\ + --disable-lzo \\\ + --disable-malloc-trim \\\ + --disable-membarrier \\\ + --disable-modules \\\ + --disable-module-upgrades \\\ + --disable-mpath \\\ + --disable-multiprocess \\\ + --disable-netmap \\\ + --disable-nettle \\\ + --disable-numa \\\ + --disable-nvmm \\\ + --disable-opengl \\\ + --disable-oss \\\ + --disable-pa \\\ + --disable-parallels \\\ + --disable-pie \\\ + --disable-pvrdma \\\ + --disable-qcow1 \\\ + --disable-qed \\\ + --disable-qom-cast-debug \\\ + --disable-rbd \\\ + --disable-rdma \\\ + --disable-replication \\\ + --disable-rng-none \\\ + --disable-safe-stack \\\ + --disable-sanitizers \\\ + --disable-sdl \\\ + --disable-sdl-image \\\ + --disable-seccomp \\\ + --disable-selinux \\\ + --disable-slirp-smbd \\\ + --disable-smartcard \\\ + --disable-snappy \\\ + --disable-sparse \\\ + --disable-spice \\\ + --disable-spice-protocol \\\ + --disable-strip \\\ + --disable-system \\\ + --disable-tcg \\\ + --disable-tools \\\ + --disable-tpm \\\ + --disable-u2f \\\ + --disable-usb-redir \\\ + --disable-user \\\ + --disable-vde \\\ + --disable-vdi \\\ + --disable-vhost-crypto \\\ + --disable-vhost-kernel \\\ + --disable-vhost-net \\\ + --disable-vhost-scsi \\\ + --disable-vhost-user \\\ + --disable-vhost-user-blk-server \\\ + --disable-vhost-vdpa \\\ + --disable-vhost-vsock \\\ + --disable-virglrenderer \\\ + --disable-virtfs \\\ + --disable-virtiofsd \\\ + --disable-vnc \\\ + --disable-vnc-jpeg \\\ + --disable-vnc-png \\\ + --disable-vnc-sasl \\\ + --disable-vte \\\ + --disable-vvfat \\\ + --disable-werror \\\ + --disable-whpx \\\ + --disable-xen \\\ + --disable-xen-pci-passthrough \\\ + --disable-xfsctl \\\ + --disable-xkbcommon \\\ + --disable-zstd \\\ + --with-git-submodules=ignore + +pushd %{qemu_kvm_build} +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{_docdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --meson="git" \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --with-git=git \ + --tls-priority=@QEMU,SYSTEM \ + %{disable_everything} \ + --enable-attr \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%endif + --enable-cap-ng \ + --enable-capstone=internal \ + --enable-coroutine-pool \ + --enable-curl \ + --enable-debug-info \ + --enable-docs \ +%if 0%{have_fdt} + --enable-fdt=system \ +%endif + --enable-gcrypt \ +%if 0%{have_gluster} + --enable-glusterfs \ +%endif + --enable-gnutls \ + --enable-guest-agent \ + --enable-iconv \ + --enable-kvm \ + --enable-libiscsi \ +%ifarch x86_64 + --enable-libpmem \ +%endif + --enable-libssh \ + --enable-libusb \ + --enable-libudev \ + --enable-linux-aio \ + --enable-lzo \ + --enable-malloc-trim \ + --enable-modules \ + --enable-mpath \ +%ifnarch s390x + --enable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%endif + --enable-pie \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%endif + --enable-seccomp \ + --enable-snappy \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ + --enable-spice-protocol \ +%endif + --enable-system \ + --enable-tcg \ + --enable-tools \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%endif + --enable-virtiofsd \ + --enable-vhost-kernel \ + --enable-vhost-net \ + --enable-vhost-user \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --enable-xkbcommon \ + --without-default-devices \ + --with-devices-%{kvm_target}=%{kvm_target}-rh-devices + + +echo "qemu-kvm config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check + +%ifarch s390x + # Copy the built new images into place for "make check": + cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ +%endif + +popd + +%install +pushd %{qemu_kvm_build} +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/avocado +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R tests/avocado/* $RPM_BUILD_ROOT%{testsdir}/tests/avocado/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0644 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu +mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook +# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook +# Please, do not carry this over to RHEL-9 +mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ +ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 + +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" + + +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt + +# Rename man page +pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +for fn in qemu.1*; do + mv $fn "qemu-kvm${fn#qemu}" +done +popd +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Install keymaps +pushd pc-bios/keymaps +for kmp in *; do + install -m 0644 $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ +done +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp +popd + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin + +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + +# Remove virtfs-proxy-helper files +rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper +rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* + +%ifarch s390x + # Use the s390-*.imgs that we've just built, not the pre-built ones + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot_dma.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so + +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo + +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + +popd + +%check +pushd %{qemu_kvm_build} +echo "Testing qemu-kvm-build" +export DIFF=diff; make check V=1 +popd + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service +%if %{have_kvm_setup} +%systemd_preun kvm-setup.service +%endif + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + +%files +# Deliberately empty + +%files -n qemu-kvm-docs +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/genindex.html +%doc %{qemudocdir}/search.html +%doc %{qemudocdir}/objects.inv +%doc %{qemudocdir}/searchindex.js +%doc %{qemudocdir}/README.rst +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/about/* +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* +%doc %{qemudocdir}/devel/* +%doc %{qemudocdir}/_static/* + +%files -n qemu-kvm-common +%defattr(-,root,root) +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man8/qemu-pr-helper.8* +%{_mandir}/man1/virtiofsd.1* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/multiboot_dma.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom + %{_libdir}/qemu-kvm/hw-display-virtio-vga.so +%endif + %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifnarch s390x + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%endif +%ifarch x86_64 %{power64} + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif + %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%ifarch x86_64 + %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so +%endif +%{_libdir}/%{name}/hw-usb-host.so +%{_datadir}/icons/* +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%{_datadir}/%{name}/trace-events-all +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif +%{_libexecdir}/virtiofsd + +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-kvm-core +%defattr(-,root,root) +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + +%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%ifarch s390x + %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%else + %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so +%endif + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_sysconfdir}/qemu-kvm/fsfreeze-hook +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files tests +%{testsdir} + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + +%if 0%{have_spice} +%files ui-spice + %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif +%endif + +%if 0%{have_opengl} +%files ui-opengl + %{_libdir}/qemu-kvm/ui-egl-headless.so + %{_libdir}/qemu-kvm/ui-opengl.so +%endif + +%if %{have_usbredir} +%files hw-usbredir + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif + + +%changelog +* Mon Mar 13 2023 Jon Maloy - 6.2.0-32.el8_8 +- kvm-aio_wait_kick-add-missing-memory-barrier.patch [bz#2168472] +- kvm-qatomic-add-smp_mb__before-after_rmw.patch [bz#2168472] +- kvm-qemu-thread-posix-cleanup-fix-document-QemuEvent.patch [bz#2168472] +- kvm-qemu-thread-win32-cleanup-fix-document-QemuEvent.patch [bz#2168472] +- kvm-edu-add-smp_mb__after_rmw.patch [bz#2168472] +- kvm-aio-wait-switch-to-smp_mb__after_rmw.patch [bz#2168472] +- kvm-qemu-coroutine-lock-add-smp_mb__after_rmw.patch [bz#2168472] +- kvm-physmem-add-missing-memory-barrier.patch [bz#2168472] +- kvm-async-update-documentation-of-the-memory-barriers.patch [bz#2168472] +- kvm-async-clarify-usage-of-barriers-in-the-polling-case.patch [bz#2168472] +- kvm-scsi-protect-req-aiocb-with-AioContext-lock.patch [bz#2090990] +- kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch [bz#2090990] +- kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch [bz#2090990] +- Resolves: bz#2168472 + (Guest hangs when starting or rebooting) +- Resolves: bz#2090990 + (qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]) + +* Wed Feb 15 2023 Jon Maloy - 6.2.0-31 +- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2137740] +- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2137740] +- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168187] +- Resolves: bz#2137740 + (Multifd migration fails under a weak network/socket ordering race) +- Resolves: bz#2168187 + ([s390x] qemu-kvm coredumps when SE crashes) + +* Mon Feb 13 2023 Jon Maloy - 6.2.0-30 +- kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2147617] +- kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2147617] +- kvm-qemu-img-bitmap-Report-errors-while-closing-the-imag.patch [bz#2147617] +- kvm-qemu-iotests-Test-qemu-img-bitmap-commit-exit-code-o.patch [bz#2147617] +- Resolves: bz#2147617 + (qemu-img finishes successfully while having errors in commit or bitmaps operations) + +* Fri Jan 27 2023 Jon Maloy - 6.2.0-29 +- kvm-block-mirror-Do-not-wait-for-active-writes.patch [bz#2125119] +- kvm-block-mirror-Drop-mirror_wait_for_any_operation.patch [bz#2125119] +- kvm-block-mirror-Fix-NULL-s-job-in-active-writes.patch [bz#2125119] +- kvm-accel-introduce-accelerator-blocker-API.patch [bz#2161188] +- kvm-KVM-keep-track-of-running-ioctls.patch [bz#2161188] +- kvm-kvm-Atomic-memslot-updates.patch [bz#2161188] +- kvm-migration-Read-state-once.patch [bz#2074205] +- kvm-s390x-pci-RPCIT-second-pass-when-mappings-exhausted.patch [bz#2163713] +- kvm-s390x-pci-coalesce-unmap-operations.patch [bz#2163713] +- kvm-s390x-pci-shrink-DMA-aperture-to-be-bound-by-vfio-DM.patch [bz#2163713] +- kvm-s390x-pci-reset-ISM-passthrough-devices-on-shutdown-.patch [bz#2163713] +- Resolves: bz#2125119 + (Mirror job with "copy-mode":"write-blocking" that used for storage migration can't converge under heavy I/O) +- Resolves: bz#2161188 + (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) +- Resolves: bz#2074205 + (while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set) +- Resolves: bz#2163713 + ([s390x] VM fails to start with ISM passed through) + +* Wed Jan 04 2023 Jon Maloy - 6.2.0-28 +- kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch [bz#2155448] +- Resolves: bz#2155448 + (RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part) + +* Thu Dec 08 2022 Jon Maloy - 6.2.0-27 +- kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch [bz#2148545] +- kvm-hw-display-qxl-Document-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch [bz#2148545] +- kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch [bz#2148545] +- Resolves: bz#2148545 + (CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8]) + +* Wed Nov 23 2022 Jon Maloy - 6.2.0-26 +- kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch [bz#2128225] +- kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch [bz#2128225] +- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2141896] +- Resolves: bz#2128225 + ([s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu) +- Resolves: bz#2141896 + (VMs hung on vnc_clipboard_send) + +* Wed Nov 16 2022 Jon Maloy - 6.2.0-25 +- kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch [bz#2132609] +- kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch [bz#2132609] +- Resolves: bz#2132609 + (qemu-kvm: backport some aarch64 fixes) + +* Thu Nov 10 2022 Jon Maloy - 6.2.0-24 +- kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch [bz#1664378 bz#2043909] +- kvm-Update-linux-headers-to-v6.0-rc4.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-for-load-store-interpretation.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch [bz#1664378 bz#2043909] +- kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-ERRP_GUARD.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-sh_info-variable.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-more-offset-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-dump_is_64bit-helper-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-phdr-note-writes.patch [bz#1664378 bz#2043909] +- kvm-dump-Cleanup-dump_begin-write-functions.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-elf-note-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch [bz#1664378 bz#2043909] +- kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-get_start_block.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-filter-area-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-dump_calculate_size-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch [bz#1664378 bz#2043909] +- kvm-dump-simplify-a-bit-kdump-get_next_page.patch [bz#1664378 bz#2043909] +- kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch [bz#1664378 bz#2043909] +- kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch [bz#1664378 bz#2043909] +- kvm-dump-Reorder-struct-DumpState.patch [bz#1664378 bz#2043909] +- kvm-dump-Reintroduce-memory_offset-and-section_offset.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-architecture-section-and-section-string-tab.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-protected-dump-cap.patch [bz#1664378 bz#2043909] +- kvm-s390x-Introduce-PV-query-interface.patch [bz#1664378 bz#2043909] +- kvm-include-elf.h-add-s390x-note-types.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-KVM-PV-dump-interface.patch [bz#1664378 bz#2043909] +- kvm-s390x-pv-Add-dump-support.patch [bz#1664378 bz#2043909] +- kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch [bz#2129760] +- Resolves: bz#1664378 + ([IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part) +- Resolves: bz#2043909 + ([IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part) +- Resolves: bz#2129760 + (CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8]) + +* Wed Oct 26 2022 Jon Maloy - 6.2.0-23 +- kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch [bz#2116302] +- kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch [bz#2116302] +- Resolves: bz#2116302 + (RHEL8.6 - virtiofs will not mount fs on secure execution guest) + +* Wed Oct 05 2022 Jon Maloy - 6.2.0-22 +- kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch [bz#2124757] +- kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch [bz#2124757] +- kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch [bz#2018885] +- Resolves: bz#2124757 + (RHEL8: skey test in kvm_unit_test got failed) +- Resolves: bz#2018885 + ([virtiofs] virtiofsd debug log's timestamp is NULL) + +* Thu Sep 29 2022 Jon Maloy - 6.2.0-21 +- kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch [bz#2117149] +- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125271] +- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125271] +- Resolves: bz#2117149 + (Can't run when memory backing with hugepages and backend type memfd) +- Resolves: bz#2125271 + ([RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0]) + +* Fri Aug 26 2022 Jon Maloy - 6.2.0-20 +- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120279] +- Resolves: bz#2120279 + (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7]) + +* Tue Aug 16 2022 Miroslav Rezanina - 6.2.0-19 +- kvm-migration-Introduce-ram_transferred_add.patch [bz#2110203] +- kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch [bz#2110203] +- kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2110203] +- kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2110203] +- kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2110203] +- kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2110203] +- kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2110203] +- kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2110203] +- kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch [bz#2112296] +- Resolves: bz#2110203 + (zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together) +- Resolves: bz#2112296 + (virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions) + +* Tue Jul 19 2022 Camilla Conte - 6.2.0-18 +- kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2105410] +- kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2105410] +- Resolves: bz#2105410 + (Stalled IO Operations in VM) + +* Tue Jul 12 2022 Camilla Conte - 6.2.0-17 +- kvm-migration-Never-call-twice-qemu_target_page_size.patch [bz#2072049] +- kvm-multifd-Rename-used-field-to-num.patch [bz#2072049] +- kvm-multifd-Add-missing-documentation.patch [bz#2072049] +- kvm-multifd-The-variable-is-only-used-inside-the-loop.patch [bz#2072049] +- kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch [bz#2072049] +- kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch [bz#2072049] +- kvm-multifd-Fill-offset-and-block-for-reception.patch [bz#2072049] +- kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch [bz#2072049] +- kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch [bz#2072049] +- kvm-migration-All-this-fields-are-unsigned.patch [bz#2072049] +- kvm-multifd-Move-iov-from-pages-to-params.patch [bz#2072049] +- kvm-multifd-Make-zlib-use-iov-s.patch [bz#2072049] +- kvm-multifd-Make-zstd-use-iov-s.patch [bz#2072049] +- kvm-multifd-Remove-send_write-method.patch [bz#2072049] +- kvm-multifd-Use-a-single-writev-on-the-send-side.patch [bz#2072049] +- kvm-multifd-Use-normal-pages-array-on-the-send-side.patch [bz#2072049] +- kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#2072049] +- kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#2072049] +- kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#2072049] +- kvm-migration-Add-migrate_use_tls-helper.patch [bz#2072049] +- kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#2072049] +- kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#2072049] +- kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#2072049] +- kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#2072049] +- kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#2072049] +- kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#2072049] +- kvm-migration-Add-migration_incoming_transport_cleanup.patch [bz#2097652] +- kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch [bz#2097652] +- kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch [bz#2098076] +- Resolves: bz#2072049 + (Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8) +- Resolves: bz#2097652 + (The migration port is not released if use it again for recovering postcopy migration) +- Resolves: bz#2098076 + (virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions) + +* Thu Jun 23 2022 Jon Maloy - 6.2.0-16 +- kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch [bz#2097209] +- kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch [bz#1951521] +- kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch [bz#1951521] +- Resolves: bz#2097209 + ([virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111) +- Resolves: bz#1951521 + (CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8]) + +* Tue Jun 14 2022 Jon Maloy - 6.2.0-15 +- kvm-virtio-gpu-do-not-byteswap-padding.patch [bz#1916415] +- kvm-linux-headers-update-to-5.16-rc1.patch [bz#1916415] +- kvm-linux-headers-Update-headers-to-v5.17-rc1.patch [bz#1916415] +- kvm-linux-headers-include-missing-changes-from-5.17.patch [bz#1916415] +- kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch [bz#1916415] +- kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch [bz#1916415] +- kvm-x86-Grant-AMX-permission-for-guest.patch [bz#1916415] +- kvm-x86-Add-XFD-faulting-bit-for-state-components.patch [bz#1916415] +- kvm-x86-Add-AMX-CPUIDs-enumeration.patch [bz#1916415] +- kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch [bz#1916415] +- kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch [bz#1916415] +- kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch [bz#1916415] +- kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch [bz#1916415] +- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2069946] +- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2069946] +- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2069946] +- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2069946] +- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2069946] +- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2069946] +- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2069946] +- kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch [bz#2029980] +- kvm-block-Make-bdrv_refresh_limits-non-recursive.patch [bz#2072932] +- kvm-iotests-Allow-using-QMP-with-the-QSD.patch [bz#2072932] +- kvm-iotests-graph-changes-while-io-New-test.patch [bz#2072932] +- Resolves: bz#1916415 + ([Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions) +- Resolves: bz#2069946 + (PXE boot crash qemu when using multiqueue vDPA) +- Resolves: bz#2029980 + (Failed assertion in IDE emulation with Ceph backend) +- Resolves: bz#2072932 + (Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7]) + +* Thu May 19 2022 Jon Maloy - 6.2.0-14 +- kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch [bz#2065043] +- kvm-target-i386-properly-reset-TSC-on-reset.patch [bz#2070417] +- kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch [bz#2040734] +- Resolves: bz#2065043 + (Remove upstream-only devices from the qemu-kvm binary) +- Resolves: bz#2070417 + (Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0]) +- Resolves: bz#2040734 + (CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7]) + +* Tue May 03 2022 Jon Maloy - 6.2.0-13 +- kvm-s390x-ipl-support-extended-kernel-command-line-size.patch [bz#2043830] +- kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch [bz#2063206] +- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#1519071] +- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#1519071] +- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#1519071] +- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#1519071] +- Resolves: bz#2043830 + ([IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU) +- Resolves: bz#2063206 + (CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8]) +- Resolves: bz#1519071 + (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-12 +- kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2040738] +- kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2063262] +- Resolves: bz#2040738 + (CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8]) +- Resolves: bz#2063262 + (CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-11 +- kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch [bz#1999236] +- kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch [bz#1999236] +- kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch [bz#1999236] +- Resolves: bz#1999236 + (CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-10 +- kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2068202] +- kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2067118] +- kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2067118] +- kvm-s390x-css-fix-PMCW-invalid-mask.patch [bz#2071070] +- kvm-Set-permission-on-installing-files.patch [bz#2072377] +- Resolves: bz#2068202 + (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0]) +- Resolves: bz#2067118 + (qemu crash after execute blockdev-reopen with iothread) +- Resolves: bz#2071070 + (s390x/css: fix PMCW invalid mask) +- Resolves: bz#2072377 + (Fix build warnings that occur when installing the keymap files) + +* Wed Apr 06 2022 Jon Maloy - 6.2.0-9 +- kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch [bz#2062613] +- kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch [bz#2062613] +- kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch [bz#2062613] +- kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2060843] +- kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch [bz#2062610] +- kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch [bz#2062610] +- kvm-vmxcap-Add-5-level-EPT-bit.patch [bz#2065207] +- kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch [bz#2065207] +- kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch [bz#2062611] +- kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch [bz#2062611] +- kvm-tests-acpi-add-SLIC-table-test.patch [bz#2062611] +- kvm-tests-acpi-SLIC-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch [bz#2062611] +- kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch [bz#2062611] +- kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch [bz#2062611] +- kvm-tests-acpi-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch [bz#2062611] +- kvm-rhel-workaround-for-lack-of-binary-patches-in-SRPM.patch [bz#2062611] +- Resolves: bz#2062613 + (Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0]) +- Resolves: bz#2060843 + ([virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0]) +- Resolves: bz#2062610 + (Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0]) +- Resolves: bz#2065207 + (Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0]) +- Resolves: bz#2062611 + (Guest can not start with SLIC acpi table [rhel-8.7.0]) + +* Tue Feb 22 2022 Jon Maloy - 6.2.0-8 +- kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2035185] +- kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2035185] +- kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2035185] +- kvm-iotests-281-Test-lingering-timers.patch [bz#2035185] +- kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2035185] +- kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2035185] +- Resolves: bz#2035185 + (Qemu core dump when start guest with nbd node or do block jobs to nbd node) + +* Tue Feb 15 2022 Jon Maloy - 6.2.0-7 +- kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#1518984] +- kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#1518984] +- kvm-doc-Add-the-SGX-numa-description.patch [bz#1518984] +- kvm-Enable-SGX-RH-Only.patch [bz#1518984] +- kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#1518984] +- kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041480] +- kvm-iotests-block-status-cache-New-test.patch [bz#2041480] +- Resolves: bz#1518984 + ([Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) +- Resolves: bz#2041480 + ([incremental_backup] Inconsistent block status reply in qemu-nbd) + +* Tue Feb 08 2022 Jon Maloy - 6.2.0-6 +- kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046198] +- kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2033279] +- kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2021778 bz#2036178] +- kvm-iotests-stream-error-on-reset-New-test.patch [bz#2021778 bz#2036178] +- kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2037135] +- kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2037135] +- Resolves: bz#2046198 + (CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6]) +- Resolves: bz#2033279 + ([wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back) +- Resolves: bz#2021778 + (Qemu core dump when do full backup during system reset) +- Resolves: bz#2036178 + (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) +- Resolves: bz#2037135 + (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) + +* Tue Jan 25 2022 Jon Maloy - 6.2.0-5 +- kvm-acpi-validate-hotplug-selector-on-access.patch [bz#2036580] +- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#2031035] +- Resolves: bz#2036580 + (CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8]) +- Resolves: bz#2031035 + (Add rhel-8.6.0 machine types for RHEL 8.6 [x86]) + +* Mon Jan 17 2022 Jon Maloy - 6.2.0-4 +- kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031039] +- kvm-hw-arm-virt-Add-8.6-machine-type.patch [bz#2031039] +- kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031039] +- kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2029582] +- Resolves: bz#2031039 + (Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64]) +- Resolves: bz#2029582 + ([8.6] machine types: 6.2: Fix prefer_sockets) + +* Mon Jan 03 2022 Jon Maloy - 6.2.0-2 +- kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch [bz#2005325] +- kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch [bz#2031041] +- Resolves: bz#2005325 + (Fix CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#2031041 + (Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le]) + +* Thu Dec 16 2021 Jon Maloy - 6.2.0-1.el8 +- Rebase to qemu-kvm 6.2.0 +- Resolves bz#2027716 + +* Mon Nov 22 2021 Jon Maloy - 6.1.0-5 +- kvm-e1000-fix-tx-re-entrancy-problem.patch [bz#1930092] +- kvm-hw-scsi-scsi-disk-MODE_PAGE_ALLS-not-allowed-in-MODE.patch [bz#2020720] +- Resolves: bz#1930092 + (CVE-2021-20257 virt:rhel/qemu-kvm: QEMU: net: e1000: infinite loop while processing transmit descriptors [rhel-8.5.0]) +- Resolves: bz#2020720 + (CVE-2021-3930 virt:rhel/qemu-kvm: QEMU: off-by-one error in mode_sense_page() in hw/scsi/scsi-disk.c [rhel-8]) + +* Thu Oct 21 2021 Jon Maloy - 6.1.0-4 +- kvm-spec-Remove-qemu-kiwi-build.patch [bz#2002694] +- kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998947] +- Resolves: bz#2002694 + (remove qemu-kiwi rpm from qemu-kvm sources in rhel-8.6) +- Resolves: bz#1998947 + (Add machine type compatibility update for 6.1 rebase [aarch64]) + +* Tue Oct 12 2021 Jon Maloy - 6.1.0-3 +- kvm-virtio-net-fix-use-after-unmap-free-for-sg.patch [bz#1999221] +- Resolves: bz#1999221 + (CVE-2021-3748 virt:rhel/qemu-kvm: QEMU: virtio-net: heap use-after-free in virtio_net_receive_rcu [rhel-8]) + +* Fri Oct 01 2021 Jon Maloy - 6.1.0-2 +- kvm-qxl-fix-pre-save-logic.patch [bz#2002907] +- kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998949] +- kvm-redhat-Update-pseries-rhel8.5.0.patch [bz#1998949] +- kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998950] +- Resolves: bz#2002907 + (Unexpectedly failed when managedsave the guest which has qxl video device) +- Resolves: bz#1998949 + (Add machine type compatibility update for 6.1 rebase [ppc64le]) +- Resolves: bz#1998950 + (Add machine type compatibility update for 6.1 rebase [s390x]) + +* Wed Aug 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-29.el8 +- kvm-file-posix-Cap-max_iov-at-IOV_MAX.patch [bz#1994494] +- kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974366] +- Resolves: bz#1994494 + (VM remains in paused state when trying to write on a resized disk resides on iscsi) +- Resolves: bz#1974366 + (Fail to set migrate incoming for 2nd time after the first time failed) + +* Wed Aug 18 2021 Danilo Cesar Lemes de Paula - 6.0.0-28.el8 +- kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1946084] +- kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1946084] +- kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1946084] +- kvm-audio-Never-send-migration-section.patch [bz#1991671] +- Resolves: bz#1946084 + (qemu-img convert --bitmaps fail if a bitmap is inconsistent) +- Resolves: bz#1991671 + (vmstate differs between -audiodev and QEMU_AUDIO_DRV when no sound frontends devs present.) + +* Wed Aug 04 2021 Miroslav Rezanina - 6.0.0-27 +- kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1976852] +- kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1976852] +- kvm-aarch64-Add-USB-storage-devices.patch [bz#1974579] +- Resolves: bz#1976852 + ([failover vf migration] The failover vf will be unregistered if canceling the migration whose status is "wait-unplug") +- Resolves: bz#1974579 + (It's not possible to start installation from a virtual USB device on aarch64) + +* Thu Jul 29 2021 Miroslav Rezanina - 6.0.0-26 +- kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1977798] +- kvm-migration-failover-reset-partially_hotplugged.patch [bz#1787194] +- kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1959676] +- kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1959729] +- kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1924822] +- kvm-ratelimit-protect-with-a-mutex.patch [bz#1838221] +- kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1838221] +- kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1838221] +- kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1930286] +- kvm-iothread-add-aio-max-batch-parameter.patch [bz#1930286] +- kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1930286] +- kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1848881] +- Resolves: bz#1977798 + (RHEL8.5 guest network interface name changed after upgrade to qemu-6.0) +- Resolves: bz#1787194 + (After canceling the migration of a vm with VF which enables failover, using "migrate -d tcp:invalid uri" to re-migrating the vm will cause the VF in vm to be hot-unplug.) +- Resolves: bz#1959676 + (guest status is paused after loadvm on rhel8.5.0) +- Resolves: bz#1959729 + (SAP/3TB VM migration slowness [idle db]) +- Resolves: bz#1924822 + ([Intel 8.5 FEAT] qemu-kvm AVX2 VNNI - Fast Train) +- Resolves: bz#1838221 + ([Intel 8.5 FEAT] qemu-kvm Bus Lock VM Exit - Fast Train) +- Resolves: bz#1930286 + (randread and randrw regression with virtio-blk multi-queue) +- Resolves: bz#1848881 + (nvme:// block driver can exhaust IOMMU DMAs, hanging the VM, possible data loss) + +* Tue Jul 20 2021 Danilo Cesar Lemes de Paula - 6.0.0-25.el8 +- kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1976171] +- kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1943653] +- kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1943653] +- kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1943653] +- kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1943653] +- kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1943653] +- kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1943653] +- Resolves: bz#1976171 + ([IBM 8.5 FEAT] CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#1943653 + (RHV VM pauses due to 'qemu-kvm' getting EINVAL on i/o to a direct lun with scsi passthrough enabled) + +* Fri Jul 16 2021 Danilo Cesar Lemes de Paula - 6.0.0-24.el8 +- kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1968326] +- kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1968326] +- kvm-s390x-css-Refactor-IRB-construction.patch [bz#1968326] +- kvm-s390x-css-Add-passthrough-IRB.patch [bz#1968326] +- kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- Resolves: bz#1968326 + ([vfio_ccw] I/O error when checking format - dasdfmt requires --force in quick mode when passed through) +- Resolves: bz#1935014 + (qemu crash when attach vhost-user-blk-pci with option queue-size=4096) +- Resolves: bz#1935019 + (qemu guest failed boot when attach vhost-user-blk-pci with option iommu_platform=on) +- Resolves: bz#1935020 + (qemu guest failed boot when attach vhost-user-blk-pci with option packed=on) +- Resolves: bz#1935031 + (qemu guest failed boot when attach vhost-user-blk-pci with unmatched num-queues with qsd) + +* Thu Jul 08 2021 Danilo Cesar Lemes de Paula - 6.0.0-23.el8 +- kvm-Add-mtod_check.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-check-bootp_input-buffer-size.patch [bz#1970823] +- kvm-upd6-check-udp6_input-buffer-size.patch [bz#1970842] +- kvm-tftp-check-tftp_input-buffer-size.patch [bz#1970850] +- kvm-tftp-introduce-a-header-structure.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-udp-check-upd_input-buffer-size.patch [bz#1970858] +- kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1804196] +- Resolves: bz#1970823 + (CVE-2021-3592 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-av-8]) +- Resolves: bz#1970842 + (CVE-2021-3593 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-av-8]) +- Resolves: bz#1970850 + (CVE-2021-3595 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-av-8]) +- Resolves: bz#1970858 + (CVE-2021-3594 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-av-8]) +- Resolves: bz#1804196 + (inconsistent paths for interop json files) + +* Fri Jul 02 2021 Danilo Cesar Lemes de Paula - 6.0.0-22.el8 +- kvm-redhat-Expose-upstream-machines-pc-4.2-and-pc-2.11.patch [bz#1897923] +- kvm-redhat-Enable-FDC-device-for-upstream-machines-too.patch [bz#1897923] +- kvm-redhat-Add-hw_compat_4_2_extra-and-apply-to-upstream.patch [bz#1897923] +- kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1789757] +- kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1932279] +- kvm-redhat-Fix-unversioned-Obsoletes-warning.patch [bz#1950405 bz#1967330] +- kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1950405 bz#1967330] +- kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1950405 bz#1967330] +- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1976015] +- Resolves: bz#1897923 + (support Live Migration from Ubuntu 18.04 i440fx to RHEL) +- Resolves: bz#1789757 + ([IBM 8.5 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1932279 + ([aarch64] qemu core dumped when using smmuv3 and iommu_platform enabling at virtio-gpu-pci) +- Resolves: bz#1950405 + (review qemu-kvm-core dependencies) +- Resolves: bz#1967330 + (Make qemu-kvm use versioned obsoletes for qemu-kvm-ma and qemu-kvm-rhev) +- Resolves: bz#1976015 + (spapr: Fix EEH capability issue on KVM guest for PCI passthru) + +* Wed Jun 23 2021 Danilo Cesar Lemes de Paula - 6.0.0-21.el8 +- kvm-block-backend-add-drained_poll.patch [bz#1960137] +- kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1960137] +- kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1866133] +- kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1954750] +- kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1954750] +- kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1954750] +- Resolves: bz#1960137 + ([incremental backup] qemu-kvm hangs when Rebooting the VM during full backup) +- Resolves: bz#1866133 + (Disable usb-bot device in QEMU (unsupported)) +- Resolves: bz#1954750 + (firmware scheme for sev-es) + +* Mon Jun 21 2021 Danilo Cesar Lemes de Paula - 6.0.0-20.el8 +- kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957838] +- kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1967603] +- kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1964326] +- Resolves: bz#1957838 + (8.5 machine types for x86) +- Resolves: bz#1967603 + (Enable interrupt based asynchronous page fault mechanism by default) +- Resolves: bz#1964326 + (Qemu core dump when do tls migration via tcp protocol) + +* Fri Jun 11 2021 Danilo Cesar Lemes de Paula - 6.0.0-19.el8 +- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1965626] +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1966463] +- kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1967177] +- kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957022] +- kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1960119] +- kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1942914] +- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1940731] +- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1940731] +- Resolves: bz#1965626 + (RHEL8.2 - QEMU BIOS fails to read stage2 loader (kvm)) +- Resolves: bz#1966463 + (Rebuild the s390-netboot.img for downstream instead of shipping the upstream image) +- Resolves: bz#1967177 + (QEMU 6.0.0 socket_get_fd() fails with the error "socket_get_fd: too many connections") +- Resolves: bz#1957022 + (SEV: Add support to query the attestation report) +- Resolves: bz#1960119 + ([regression]Failed to reset guest) +- Resolves: bz#1942914 + ([Hyper-V][RHEL8.4]Nested Hyper-V on KVM: On Intel CPU L1 2016 can not start with cpu model Skylake-Server-noTSX-IBRS or Skylake-Client-noTSX-IBRS) +- Resolves: bz#1940731 + ([ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8) + +* Tue Jun 01 2021 Danilo Cesar Lemes de Paula - 6.0.0-18.el8 +- kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1953045] +- kvm-hw-arm-virt-Add-8.5-machine-type.patch [bz#1957667] +- kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957667] +- kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1957667] +- kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1927108] +- kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1927108] +- kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1927108] +- kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1927108] +- kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1929720] +- Resolves: bz#1953045 + (qemu-kvm NULL pointer de-reference during migration at migrate_fd_connect ->...-> notifier_list_notify) +- Resolves: bz#1957667 + ([aarch64] Add 8.5 machine type) +- Resolves: bz#1927108 + (It's too slow to load scsi disk when use 384 vcpus) +- Resolves: bz#1929720 + ([aarch64] Handle vsmmuv3 IOTLB invalidation with non power of 2 size) + +* Tue May 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-17.el8 +- kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1951476] +- kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957834] +- kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957834] +- Resolves: bz#1951476 + ([s390x] RHEL AV 8.5 new machine type for s390x) +- Resolves: bz#1957834 + ([ppc64le] RHEL AV 8.5 new machine type for ppc64le) + +* Mon May 03 2021 Danilo Cesar Lemes de Paula - 6.0.0-16.el8 +- Rebase to qemu-kvm 6.0.0 + +* Wed Apr 28 2021 Danilo Cesar Lemes de Paula - 5.2.0-16.el8 +- kvm-virtio-pci-compat-page-aligned-ATS.patch [bz#1942362] +- Resolves: bz#1942362 + (Live migration with iommu from rhel8.3.1 to rhel8.4 fails: qemu-kvm: get_pci_config_device: Bad config data) + +* Mon Apr 12 2021 Danilo Cesar Lemes de Paula - 5.2.0-15.el8_4 +- kvm-block-Simplify-qmp_block_resize-error-paths.patch [bz#1903511] +- kvm-block-Fix-locking-in-qmp_block_resize.patch [bz#1903511] +- kvm-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch [bz#1903511] +- Resolves: bz#1903511 + (no response on QMP command 'block_resize') + +* Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 +- kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] +- kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] +- kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch [bz#1937004] +- kvm-block-export-fix-vhost-user-blk-export-sector-number.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-read-write-range-check.patch [bz#1937004] +- kvm-spec-ui-spice-sub-package.patch [bz#1936373] +- kvm-spec-ui-opengl-sub-package.patch [bz#1936373] +- Resolves: bz#1937004 + (vhost-user-blk server endianness and input validation fixes) +- Resolves: bz#1936373 + (move spice & opengl modules to rpm subpackages) + +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 5.2.0-13.el8 +- kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch [bz#1934158] +- Resolves: bz#1934158 + (Windows guest looses network connectivity when NIC was configured with static IP) + +* Mon Mar 15 2021 Danilo Cesar Lemes de Paula - 5.2.0-12.el8 +- kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch [bz#1927530] +- kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch [bz#1927530] +- kvm-scsi-introduce-scsi_sense_from_errno.patch [bz#1927530] +- kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch [bz#1927530] +- kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch [bz#1927530] +- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1936948] +- Resolves: bz#1927530 + (RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning) +- Resolves: bz#1936948 + (CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 5.2.0-11.el8 +- kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] +- kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] +- kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] +- kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch [bz#1935071] +- Resolves: bz#1932190 + (Timeout when dump the screen from 2nd VGA) +- Resolves: bz#1935071 + (CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8]) + +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 +- kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] +- kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] +- kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch [bz#1930757] +- kvm-failover-fix-indentantion.patch [bz#1819991] +- kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch [bz#1819991] +- kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch [bz#1819991] +- kvm-failover-Remove-unused-parameter.patch [bz#1819991] +- kvm-failover-Remove-external-partially_hotplugged-proper.patch [bz#1819991] +- kvm-failover-qdev_device_add-returns-err-or-dev-set.patch [bz#1819991] +- kvm-failover-Rename-bool-to-failover_primary_hidden.patch [bz#1819991] +- kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch [bz#1819991] +- kvm-failover-Remove-primary_device_opts.patch [bz#1819991] +- kvm-failover-remove-standby_id-variable.patch [bz#1819991] +- kvm-failover-Remove-primary_device_dict.patch [bz#1819991] +- kvm-failover-Remove-memory-leak.patch [bz#1819991] +- kvm-failover-simplify-virtio_net_find_primary.patch [bz#1819991] +- kvm-failover-should_be_hidden-should-take-a-bool.patch [bz#1819991] +- kvm-failover-Rename-function-to-hide_device.patch [bz#1819991] +- kvm-failover-virtio_net_connect_failover_devices-does-no.patch [bz#1819991] +- kvm-failover-Rename-to-failover_find_primary_device.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add-failover-case.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add.patch [bz#1819991] +- kvm-failover-make-sure-that-id-always-exist.patch [bz#1819991] +- kvm-failover-remove-failover_find_primary_device-error-p.patch [bz#1819991] +- kvm-failover-split-failover_find_primary_device_id.patch [bz#1819991] +- kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch [bz#1819991] +- kvm-failover-Caller-of-this-two-functions-already-have-p.patch [bz#1819991] +- kvm-failover-simplify-failover_unplug_primary.patch [bz#1819991] +- kvm-failover-Remove-primary_dev-member.patch [bz#1819991] +- kvm-virtio-net-add-missing-object_unref.patch [bz#1819991] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1926785] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1926785] +- Resolves: bz#1930757 + (Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping') +- Resolves: bz#1819991 + (Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug) +- Resolves: bz#1926785 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train) + +* Mon Mar 01 2021 Danilo Cesar Lemes de Paula - 5.2.0-9.el8 +- kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch [bz#1901323] +- kvm-docs-add-qemu-storage-daemon-1-man-page.patch [bz#1901323] +- kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch [bz#1901323] +- kvm-qemu-storage-daemon-Enable-object-add.patch [bz#1901323] +- kvm-spec-Package-qemu-storage-daemon.patch [bz#1901323] +- kvm-default-configs-Enable-vhost-user-blk.patch [bz#1930033] +- kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch [bz#1925345] +- kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch [bz#1917654] +- Resolves: bz#1901323 + (QSD (QEMU Storage Daemon): basic support - TechPreview) +- Resolves: bz#1930033 + (enable vhost-user-blk device) +- Resolves: bz#1925345 + (qemu-nbd needs larger backlog for Unix socket listen()) +- Resolves: bz#1917654 + ([failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm) + +* Fri Feb 19 2021 Eduardo Lima (Etrunko) - 5.2.0-8.el8 +- kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch [bz#1887883] +- kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch [bz#1887883] +- kvm-nbd-make-nbd_read-return-EIO-on-error.patch [bz#1887883] +- kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch [bz#1907255] +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1920740] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1920740] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1920740] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1920941] +- kvm-pci-reject-too-large-ROMs.patch [bz#1917830] +- kvm-pci-add-romsize-property.patch [bz#1917830] +- kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch [bz#1917826] +- kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch [bz#1880299] +- Resolves: bz#1887883 + (qemu blocks client progress with various NBD actions) +- Resolves: bz#1907255 + (Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1) +- Resolves: bz#1920740 + (CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0]) +- Resolves: bz#1920941 + ([ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot) +- Resolves: bz#1917830 + (Add romsize property to qemu-kvm) +- Resolves: bz#1917826 + (Add extra device support to qemu-kvm, but not to rhel machine types) +- Resolves: bz#1880299 + (vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client) + +* Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 +- kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] +- kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] +- kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch [bz#1918966 bz#1918968] +- kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch [bz#1918966 bz#1918968] +- kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch [bz#1918966 bz#1918968] +- kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch [bz#1918966 bz#1918968] +- Resolves: bz#1903521 + (hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed.) +- Resolves: bz#1918966 + ([incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'") +- Resolves: bz#1918968 + ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) + +* Tue Feb 09 2021 Eduardo Lima (Etrunko) - 5.2.0-6.el8 +- kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] +- kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] +- kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] +- kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch [bz#1834152] +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902219] +- kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch [bz#1925028] +- kvm-docs-set-CONFDIR-when-running-sphinx.patch [bz#1902537] +- Resolves: bz#1854811 + (scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash) +- Resolves: bz#1907264 + (systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base) +- Resolves: bz#1881170 + (split documentation from the qemu-kvm-core package to its own subpackage) +- Resolves: bz#1924766 + (split documentation from the qemu-kvm-core package to its own subpackage [av-8.4.0]) +- Resolves: bz#1834152 + ([aarch64] QEMU SMMUv3 device: Support range invalidation) +- Resolves: bz#1902219 + (QEMU doesn't honour system crypto policies) +- Resolves: bz#1925028 + (vsmmuv3/vhost and virtio-iommu/vhost regression) +- Resolves: bz#1902537 + (The default fsfreeze-hook path from man page and qemu-ga --help command are different) + +* Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] +- kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch [bz#1838738] +- kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch [bz#1904268] +- kvm-config-enable-VFIO_CCW.patch [bz#1922170] +- Resolves: bz#1914069 + ([ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed)) +- Resolves: bz#1838738 + ([Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train) +- Resolves: bz#1904268 + ([RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs) +- Resolves: bz#1922170 + (Enable vfio-ccw in AV) + +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] +- Resolves: bz#1918061 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 +- kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] +- kvm-spec-find-system-python-via-meson.patch [bz#1899619] +- kvm-build-system-use-b_staticpic-false.patch [bz#1899619] +- kvm-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch [bz#1908693] +- kvm-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch [bz#1912846] +- kvm-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch [bz#1755075] +- kvm-AArch64-machine-types-cleanup.patch [bz#1895276] +- kvm-hw-arm-virt-Add-8.4-Machine-type.patch [bz#1895276] +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1914463] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1845758] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1845758] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1845758] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1845758] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1845758] +- kvm-RHEL-Switch-pvpanic-test-to-q35.patch [bz#1885555] +- kvm-8.4-x86-machine-type.patch [bz#1885555] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904392] +- Resolves: bz#1848834 + (Failed to create luks format image on NVMe device) +- Resolves: bz#1899619 + (QEMU 5.2 is built with PIC objects instead of PIE) +- Resolves: bz#1908693 + ([ppc64le]boot up a guest with 128 numa nodes ,qemu got coredump) +- Resolves: bz#1912846 + (qemu-kvm: Failed to load xhci:parent_obj during migration) +- Resolves: bz#1755075 + ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) +- Resolves: bz#1895276 + (Machine types update for aarch64 for QEMU 5.2.0) +- Resolves: bz#1914463 + (Remove KVM guest count and limit info message) +- Resolves: bz#1845758 + (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) +- Resolves: bz#1885555 + (8.4 machine types for x86) +- Resolves: bz#1904392 + (CVE-2020-27821 virt:8.4/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-av-8]) + +* Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 +- kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] +- kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] +- kvm-redhat-Define-pseries-rhel8.4.0-machine-type.patch [bz#1893935] +- kvm-redhat-s390x-add-rhel-8.4.0-compat-machine.patch [bz#1836282] +- Resolves: bz#1836282 + (New machine type for qemu-kvm on s390x in RHEL-AV) +- Resolves: bz#1893935 + (New machine type on RHEL-AV 8.4 for ppc64le) + +* Wed Dec 09 2020 Miroslav Rezanina - 5.2.0-1.el8 +- Rebase to QEMU 5.2.0 [bz#1905933] +- Resolves: bz#1905933 + (Rebase qemu-kvm to version 5.2.0) + +* Tue Dec 01 2020 Danilo Cesar Lemes de Paula - 5.1.0-16.el8 +- kvm-redhat-introduces-disable_everything-macro-into-the-.patch [bz#1884611] +- kvm-redhat-scripts-extract_build_cmd.py-Avoid-listing-em.patch [bz#1884611] +- kvm-redhat-Removing-unecessary-configurations.patch [bz#1884611] +- kvm-redhat-Fixing-rh-local-build.patch [bz#1884611] +- kvm-redhat-allow-Makefile-rh-prep-builddep-to-fail.patch [bz#1884611] +- kvm-redhat-adding-rh-rpm-target.patch [bz#1884611] +- kvm-redhat-move-shareable-files-from-qemu-kvm-core-to-qe.patch [bz#1884611] +- kvm-redhat-Add-qemu-kiwi-subpackage.patch [bz#1884611] +- Resolves: bz#1884611 + (Build kata-specific version of qemu) + +* Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] +- kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] +- kvm-libqtest-Rename-qmp_assert_error_class-to-qmp_expect.patch [bz#1812399 bz#1866707] +- kvm-qtest-rename-qtest_qmp_receive-to-qtest_qmp_receive_.patch [bz#1812399 bz#1866707] +- kvm-qtest-Reintroduce-qtest_qmp_receive-with-QMP-event-b.patch [bz#1812399 bz#1866707] +- kvm-qtest-remove-qtest_qmp_receive_success.patch [bz#1812399 bz#1866707] +- kvm-device-plug-test-use-qtest_qmp-to-send-the-device_de.patch [bz#1812399 bz#1866707] +- kvm-qtest-switch-users-back-to-qtest_qmp_receive.patch [bz#1812399 bz#1866707] +- kvm-qtest-check-that-drives-are-really-appearing-and-dis.patch [bz#1812399 bz#1866707] +- kvm-qemu-iotests-qtest-rewrite-test-067-as-a-qtest.patch [bz#1812399 bz#1866707] +- kvm-qdev-add-check-if-address-free-callback-for-buses.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-switch-search-direction-in-scsi_device.patch [bz#1812399 bz#1866707] +- kvm-device_core-use-drain_call_rcu-in-in-qmp_device_add.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-RCU-for-list-of-children-of-a-bus.patch [bz#1812399 bz#1866707] +- kvm-scsi-switch-to-bus-check_address.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-atomic_set-on-.realized-property.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi-bus-scsi_device_find-don-t-return-unrealiz.patch [bz#1812399] +- kvm-scsi-scsi_bus-Add-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-virtio-scsi-use-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-fix-races-in-REPORT-LUNS.patch [bz#1812399 bz#1866707] +- kvm-tests-migration-fix-memleak-in-wait_command-wait_com.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-the-order-of-buffered-events.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-memory-leak-in-the-qtest_qmp_event_ref.patch [bz#1812399 bz#1866707] +- kvm-iotests-add-filter_qmp_virtio_scsi-function.patch [bz#1812399 bz#1866707] +- kvm-iotests-rewrite-iotest-240-in-python.patch [bz#1812399 bz#1866707] +- Resolves: bz#1812399 + (Qemu crash when detach disk with cache="none" discard="ignore" io="native") +- Resolves: bz#1866707 + (qemu-kvm is crashing with error "scsi_target_emulate_report_luns: Assertion `i == n + 8' failed") +- Resolves: bz#1882719 + (qemu-ga service still active and can work after qemu-guest-agent been removed) + +* Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 +- kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] +- Resolves: bz#1884276 + (Pod with kata-runtime won't start, QEMU: "vhost_user_dev init failed, Operation not permitted" [mkdtemp failing in sandboxing]) + +* Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 +- kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] +- kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] +- kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] +- Resolves: bz#1846886 + (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + +* Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 +- kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] +- Resolves: bz#1877209 + ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) + +* Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 +- kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] +- Resolves: bz#1688978 + (RFE: forward host preferences for cipher suites and CA certs to guest firmware) + +* Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] +- kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] +- Resolves: bz#1738820 + ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) +- Resolves: bz#1752376 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) +- Resolves: bz#1821528 + (missing namespace attribute when access the rbd image with namespace) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 +- kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] +- kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] +- kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] +- kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] +- Resolves: bz#1789757 + ([IBM 8.4 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1849483 + (Failed to boot up guest when hotplugging vcpus on bios stage) +- Resolves: bz#1870384 + ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) + +* Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 +- kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] +- kvm-Bump-required-libusbx-version.patch [bz#1856591] +- Resolves: bz#1856591 + (libusbx isn't updated with qemu-kvm) +- Resolves: bz#1869635 + ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) + +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 + (-prom-env does not validate input) +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + +* Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] +- kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] +- kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] +- kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] +- kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] +- Resolves: bz#1790492 + ('dirty-bitmaps' migration capability should allow configuring target nodenames) +- Resolves: bz#1849707 + (8.3 machine types for x86 - 5.1 update) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 +- kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] +- kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] +- kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] +- kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1801242 + ([aarch64] vTPM support in machvirt) +- Resolves: bz#1843348 + (8.3 machine types for POWER) +- Resolves: bz#1867847 + ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 +- kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] +- Resolves: bz#1853265 + (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 +- Quick changelog fix to reflect the current fixes: +- Resolve: bz#1781911 +- Resolve: bz#1841529 +- Resolve: bz#1842902 +- Resolve: bz#1818843 +- Resolve: bz#1819292 +- Resolve: bz#1801242 + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 +- Rebase to 5.1.0 +- Resolves: bz#1809650 + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] +- Resolves: bz#1820120 + (After hotunplugging the vitrio device and netdev, hotunpluging the failover VF will cause qemu core dump) + +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838082] +- Resolves: bz#1812765 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1838082 + (CVE-2020-1983 virt:8.2/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-av-8]) + +* Thu Jun 18 2020 Eduardo Lima (Etrunko) - 4.2.0-27.el8 +- kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch [bz#1820531] +- kvm-spec-Fix-python-shenigans-for-tests.patch [bz#1845779] +- kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch [bz#1840342] +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) +- Resolves: bz#1840342 + ([Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train) +- Resolves: bz#1845779 + (Install 'qemu-kvm-tests' failed as nothing provides /usr/libexec/platform-python3 - virt module 6972) + +* Wed Jun 17 2020 Eduardo Lima (Etrunko) - 4.2.0-26.el8 +- kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch [bz#1845384] +- kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch [bz#1845384] +- Resolves: bz#1845384 + (CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8]) + +* Tue Jun 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-25.el8 +- kvm-enable-ramfb.patch [bz#1841068] +- kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch [bz#1780574] +- kvm-block-Add-flags-to-bdrv-_co-_truncate.patch [bz#1780574] +- kvm-block-backend-Add-flags-to-blk_truncate.patch [bz#1780574] +- kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-block-truncate-Don-t-make-backing-file-data-visible.patch [bz#1780574] +- kvm-iotests-Add-qemu_io_log.patch [bz#1780574] +- kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch [bz#1780574] +- kvm-iotests-Test-committing-to-short-backing-file.patch [bz#1780574] +- kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch [bz#1780574] +- kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch [bz#1769912] +- kvm-i386-Add-macro-for-stibp.patch [bz#1769912] +- kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch [bz#1769912] +- kvm-i386-Add-new-CPU-model-Cooperlake.patch [bz#1769912] +- kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch [bz#1769912] +- Resolves: bz#1769912 + ([Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train) +- Resolves: bz#1780574 + (Data corruption with resizing short overlay over longer backing files) +- Resolves: bz#1841068 + (RFE: please support the "ramfb" display device model) + +* Mon Jun 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-24.el8 +- kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch [bz#1513681] +- kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch [bz#1841038] +- kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch [bz#1841038] +- kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch [bz#1779893 bz#1779904] +- kvm-iotests-Let-_make_test_img-parse-its-parameters.patch [bz#1779893 bz#1779904] +- kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch [bz#1779893 bz#1779904] +- kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-bitmap-sub-command.patch [bz#1779893 bz#1779904] +- kvm-iotests-Fix-test-178.patch [bz#1779893 bz#1779904] +- kvm-qcow2-Expose-bitmaps-size-during-measure.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-convert-bitmaps-option.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch [bz#1778593] +- kvm-iotests-don-t-use-format-for-drive_add.patch [bz#1778593] +- kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch [bz#1778593] +- kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch [bz#1778593] +- kvm-backup-Improve-error-for-bdrv_getlength-failure.patch [bz#1778593] +- kvm-backup-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Backup-with-different-source-target-size.patch [bz#1778593] +- kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch [bz#1778593] +- kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch [bz#1778593] +- kvm-mirror-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Mirror-with-different-source-target-size.patch [bz#1778593] +- Resolves: bz#1513681 + ([Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train) +- Resolves: bz#1778593 + (Qemu coredump when backup to a existing small size image) +- Resolves: bz#1779893 + (RFE: Copy bitmaps with qemu-img convert) +- Resolves: bz#1779904 + (RFE: ability to estimate bitmap space utilization for qcow2) +- Resolves: bz#1841038 + (qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7) + +* Thu Jun 04 2020 Danilo Cesar Lemes de Paula - 4.2.0-23.el8 +- kvm-target-arm-Fix-PAuth-sbox-functions.patch [bz#1813940] +- kvm-Don-t-leak-memory-when-reallocation-fails.patch [bz#1749737] +- kvm-Replace-remaining-malloc-free-user-with-glib.patch [bz#1749737] +- kvm-Revert-RHEL-disable-hostmem-memfd.patch [bz#1839030] +- kvm-block-introducing-bdrv_co_delete_file-interface.patch [bz#1827630] +- kvm-block.c-adding-bdrv_co_delete_file.patch [bz#1827630] +- kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch [bz#1827630] +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) +- Resolves: bz#1813940 + (CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8]) +- Resolves: bz#1827630 + (volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm)) +- Resolves: bz#1839030 + (RFE: enable the "memfd" memory backend) + +* Mon May 25 2020 Danilo Cesar Lemes de Paula - 4.2.0-22.el8 +- kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch [bz#1775462] +- kvm-numa-remove-not-needed-check.patch [bz#1600217] +- kvm-numa-properly-check-if-numa-is-supported.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch [bz#1600217] +- kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch [bz#1600217] +- kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch [bz#1600217] +- kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch [bz#1600217] +- kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch [bz#1600217] +- Resolves: bz#1600217 + ([Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train) +- Resolves: bz#1775462 + (Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image) + +* Mon May 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-21.el8 +- kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch [bz#1820531] +- kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch [bz#1820531] +- kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch [bz#1817445] +- kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch [bz#1817445] +- kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch [bz#1817445] +- kvm-virtiofsd-jail-lo-proc_self_fd.patch [bz#1817445] +- kvm-virtiofsd-Show-submounts.patch [bz#1817445] +- kvm-virtiofsd-only-retain-file-system-capabilities.patch [bz#1817445] +- kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch [bz#1817445] +- Resolves: bz#1817445 + (CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8]) +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) + +* Fri May 01 2020 Jon Maloy - 4.2.0-20.el8 +- kvm-pcie_root_port-Add-hotplug-disabling-option.patch [bz#1790899] +- kvm-compat-disable-edid-for-virtio-gpu-ccw.patch [bz#1816793] +- Resolves: bz#1790899 + ([RFE] QEMU devices should have the option to enable/disable hotplug/unplug) +- Resolves: bz#1816793 + ('edid' compat handling missing for virtio-gpu-ccw) + +* Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 +- kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] +- Resolves: bz#1822682 + (QEMU-4.2 fails to start a VM on Azure) + +* Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 +- kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] +- kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] +- kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] +- kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] +- kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] +- kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] +- Resolves: bz#1817621 + (Crash and deadlock with block jobs when using io-threads) + +* Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 +- kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] +- kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] +- kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] +- Resolves: bz#1794692 + (Mirror block job stops making progress) +- Resolves: bz#1816007 + (qemu-img convert failed to convert with block device as target) + +* Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 +- kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] +- kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] +- Resolves: bz#1811670 + (Unneeded qemu-guest-agent dependency on pixman) +- Resolves: bz#1814336 + ([POWER9] QEMU migration-test triggers a kernel warning) + +* Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 +- kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] +- kvm-block-Generic-file-creation-fallback.patch [bz#1640894] +- kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] +- kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] +- kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] +- kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] +- kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] +- kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] +- kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] +- kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] +- kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] +- kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] +- kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] +- kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] +- kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] +- kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] +- kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] +- kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] +- Resolves: bz#1640894 + (Fix generic file creation fallback for qemu-img nvme:// image creation support) +- Resolves: bz#1790482 + (bitmaps in backing images can't be modified) +- Resolves: bz#1805143 + (allow late/lazy opening of backing chain for shallow blockdev-mirror) +- Resolves: bz#1809380 + (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) + +* Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 +- kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] +- kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] +- kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] +- kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] +- kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] +- kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] +- kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] +- kvm-migration-Create-migration_is_running.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] +- kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] +- kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] +- kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] +- kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] +- kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] +- kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] +- kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] +- kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] +- Resolves: bz#1738451 + (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) +- Resolves: bz#1782529 + (Windows Update Enablement with default smbios strings in qemu) +- Resolves: bz#1797064 + (virtiofsd: Fixes) + +* Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] +- kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] +- kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] +- kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] +- kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] +- kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] +- kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] +- kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] +- kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- Resolves: bz#1703907 + ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) +- Resolves: bz#1791648 + ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) +- Resolves: bz#1794692 + (Mirror block job stops making progress) + +* Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 +- kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] +- Resolves: bz#1805334 + (vhost-user/50-qemu-gpu.json is not valid JSON) + +* Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 +- kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] +- kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] +- kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] +- kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] +- Resolves: bz#1791590 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1796240 + (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) +- Resolves: bz#1798994 + (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) + +* Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 +- kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] +- kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] +- kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] +- kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] +- kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] +- kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] +- kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] +- kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] +- kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] +- Resolves: bz#1745606 + (Qemu hang when do incremental live backup in transaction mode without bitmap) +- Resolves: bz#1746217 + (Src qemu hang when do storage vm migration during guest installation) +- Resolves: bz#1773517 + (Src qemu hang when do storage vm migration with dataplane enable) +- Resolves: bz#1779036 + (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) +- Resolves: bz#1779078 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) +- Resolves: bz#1781637 + (qemu crashed when do mem and disk snapshot) +- Resolves: bz#1782111 + (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) +- Resolves: bz#1782175 + (Qemu core dump when add persistent bitmap(data plane enable)) +- Resolves: bz#1783965 + (Qemu core dump when do backup with sync: bitmap and no bitmap provided) +- Resolves: bz#1787291 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) +- Resolves: bz#1801320 + (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 +- kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] +- kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] +- kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] +- kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] +- kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] +- kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] +- kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] +- kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] +- kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] +- kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] +- kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] +- kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] +- kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] +- kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] +- Resolves: bz#1529231 + ([q35] VM hangs after migration with 200 vCPUs) +- Resolves: bz#1647366 + (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) +- Resolves: bz#1776638 + (Guest failed to boot up after system_reset 20 times) +- Resolves: bz#1787395 + (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) +- Resolves: bz#1787444 + (Broken postcopy migration with vTPM device) +- Resolves: bz#1794503 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) + +* Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 +- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] +- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] +- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] +- kvm-vhost-coding-style-fix.patch [bz#1779041] +- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] +- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] +- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] +- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] +- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] +- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] +- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] +- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] +- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] +- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] +- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] +- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] +- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] +- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] +- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] +- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] +- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] +- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] +- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] +- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] +- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] +- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] +- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] +- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] +- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] +- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] +- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] +- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] +- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] +- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] +- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] +- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] +- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] +- kvm-virtiofsd-validate-path-components.patch [bz#1694164] +- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] +- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] +- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] +- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] +- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] +- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] +- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] +- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] +- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] +- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] +- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] +- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] +- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] +- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] +- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] +- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] +- kvm-virtiofsd-Handle-reinit.patch [bz#1694164] +- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] +- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] +- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] +- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] +- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] +- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] +- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] +- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] +- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] +- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] +- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] +- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] +- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] +- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] +- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] +- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] +- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] +- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] +- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] +- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] +- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] +- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] +- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] +- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] +- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] +- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] +- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] +- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] +- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] +- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] +- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] +- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] +- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] +- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] +- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] +- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] +- Resolves: bz#1694164 + (virtio-fs: host<->guest shared file system (qemu)) +- Resolves: bz#1725084 + (aarch64: support dumping SVE registers) +- Resolves: bz#1779041 + (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) + +* Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] +- kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] +- Resolves: bz#1559846 + (Nested KVM: limit VMX features according to CPU models - Fast Train) +- Resolves: bz#1791568 + (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) + +* Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 +- kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] +- kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] +- kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] +- kvm-xhci-recheck-slot-status.patch [bz#1790844] +- Resolves: bz#1733893 + (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) +- Resolves: bz#1782678 + (qemu core dump after hot-unplugging the XXV710/XL710 PF) +- Resolves: bz#1789301 + (virtio-blk/scsi: fix notification suppression during AioContext polling) +- Resolves: bz#1790844 + (USB related fixes) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] +- kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] +- Resolves: bz#1741345 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) +- Resolves: bz#1772774 + (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) + +* Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 +- Rebase to qemu-4.2 +- Resolves: bz#1783250 + (rebase qemu-kvm to 4.2) + +* Tue Dec 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-18.el8 +- kvm-LUKS-support-preallocation.patch [bz#1534951] +- kvm-nbd-add-empty-.bdrv_reopen_prepare.patch [bz#1718727] +- kvm-qdev-qbus-add-hidden-device-support.patch [bz#1757796] +- kvm-pci-add-option-for-net-failover.patch [bz#1757796] +- kvm-pci-mark-devices-partially-unplugged.patch [bz#1757796] +- kvm-pci-mark-device-having-guest-unplug-request-pending.patch [bz#1757796] +- kvm-qapi-add-unplug-primary-event.patch [bz#1757796] +- kvm-qapi-add-failover-negotiated-event.patch [bz#1757796] +- kvm-migration-allow-unplug-during-migration-for-failover.patch [bz#1757796] +- kvm-migration-add-new-migration-state-wait-unplug.patch [bz#1757796] +- kvm-libqos-tolerate-wait-unplug-migration-state.patch [bz#1757796] +- kvm-net-virtio-add-failover-support.patch [bz#1757796] +- kvm-vfio-unplug-failover-primary-device-before-migration.patch [bz#1757796] +- kvm-net-virtio-fix-dev_unplug_pending.patch [bz#1757796] +- kvm-net-virtio-return-early-when-failover-primary-alread.patch [bz#1757796] +- kvm-net-virtio-fix-re-plugging-of-primary-device.patch [bz#1757796] +- kvm-net-virtio-return-error-when-device_opts-arg-is-NULL.patch [bz#1757796] +- kvm-vfio-don-t-ignore-return-value-of-migrate_add_blocke.patch [bz#1757796] +- kvm-hw-vfio-pci-Fix-double-free-of-migration_blocker.patch [bz#1757796] +- Resolves: bz#1534951 + (RFE: Support preallocation mode for luks format) +- Resolves: bz#1718727 + (Committing changes to the backing file over NBD fails with reopening files not supported) +- Resolves: bz#1757796 + (RFE: support for net failover devices in qemu) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 4.1.0-17.el8 +- kvm-qemu-pr-helper-fix-crash-in-mpath_reconstruct_sense.patch [bz#1772322] +- Resolves: bz#1772322 + (qemu-pr-helper: fix crash in mpath_reconstruct_sense) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-16.el8 +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1745209] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1745209] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1745209] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1745209] +- kvm-curl-Report-only-ready-sockets.patch [bz#1745209] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1745209] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1745209] +- kvm-vhost-user-save-features-if-the-char-dev-is-closed.patch [bz#1738768] +- kvm-block-snapshot-Restrict-set-of-snapshot-nodes.patch [bz#1658981] +- kvm-iotests-Test-internal-snapshots-with-blockdev.patch [bz#1658981] +- kvm-qapi-Add-feature-flags-to-commands-in-qapi-introspec.patch [bz#1658981] +- kvm-qapi-Allow-introspecting-fix-for-savevm-s-cooperatio.patch [bz#1658981] +- kvm-block-Remove-backing-null-from-bs-explicit_-options.patch [bz#1773925] +- kvm-iotests-Test-multiple-blockdev-snapshot-calls.patch [bz#1773925] +- Resolves: bz#1658981 + (qemu failed to create internal snapshot via 'savevm' when using blockdev) +- Resolves: bz#1738768 + (Guest fails to recover receiving packets after vhost-user reconnect) +- Resolves: bz#1745209 + (qemu-img gets stuck when stream-converting from http) +- Resolves: bz#1773925 + (Fail to do blockcommit with more than one snapshots) + +* Thu Nov 14 2019 Danilo Cesar Lemes de Paula - 4.1.0-15.el8 +- kvm-virtio-blk-Add-blk_drain-to-virtio_blk_device_unreal.patch [bz#1706759] +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1772473] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1772473] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1772473] +- Resolves: bz#1706759 + (qemu core dump when unplug a 16T GPT type disk from win2019 guest) +- Resolves: bz#1772473 + (Import fixes from 8.1.0 into 8.1.1 branch) + +* Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1764721] +- Resolves: bz#1751934 + (Fail to install guest when xfs is the host filesystem) +- Resolves: bz#1764721 + (qcow2 image corruption due to incorrect locking in preallocation detection) + +* Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 +- kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] +- kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] +- Resolves: bz#1744955 + (Qemu hang when block resize a qcow2 image) +- Resolves: bz#1748253 + (QEMU crashes (core dump) when using the integrated NDB server with data-plane) + +* Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 +- kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] +- kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] +- kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] +- kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] +- Resolves: bz#1745922 + (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') + +* Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 +- kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] +- kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] +- kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] +- kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] +- Resolves: bz#1724008 + (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") +- Resolves: bz#1736788 + (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) +- Resolves: bz#1746631 + (Qemu core dump when do block commit under stress) +- Resolves: bz#1753992 + (core dump when testing persistent reservation in guest) + +* Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 +- kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] +- Resolves: bz#1717321 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1746267 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1748725 + ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) + +* Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 +- kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] +- kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] +- kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] +- kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] +- kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] +- kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] +- Resolves: bz#1734316 + (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) +- Resolves: bz#1750200 + ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) + +* Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] +- kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] +- kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] +- Resolves: bz#1743477 + (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) +- Resolves: bz#1746790 + (qemu core dump while migrate from RHEL7.6 to RHEL8.1) +- Resolves: bz#1747836 + (Call traces after guest migration due to incorrect handling of the timebase) +- Resolves: bz#1749134 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) + +* Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 +- kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] +- kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] +- kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] +- kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] +- kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] +- Resolves: bz#1516220 + (-trace help prints an incomplete list of trace events) +- Resolves: bz#1726898 + (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) +- Resolves: bz#1744107 + (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") + +* Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 +- kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] +- kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] +- kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] +- kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] +- kvm-enable-virgl-for-real-this-time.patch [bz#1559740] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1743142 + (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) + +* Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 +- kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] +- kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] +- kvm-enable-virgl.patch [bz#1559740] +- kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1693772 + ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) +- Resolves: bz#1742197 + (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744170 + ([IBM Power] New 8.1.0 machine type for pseries) + +* Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] +- Resolves: bz#1738626 + (Disable memfd in QEMU) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 +- kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] +- kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] +- kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] +- kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] +- kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] +- Resolves: bz#1719649 + (8.1 machine type for x86) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 +- kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] +- kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] +- kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] +- kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] +- kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] +- kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] +- kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] +- Resolves: bz#1725664 + (Update seabios dependency) +- Resolves: bz#1733977 + (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) +- Resolves: bz#1740692 + (Backport QEMU 4.1.0 rc5 & ga patches) +- Resolves: bz#1741451 + (Failed to hot-plug vcpus) + +* Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 +- Rebase to qemu 4.1.0 rc4 [bz#1705235] +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 +- kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] +- kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] +- kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] +- Resolves: bz#1519013 + ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) +- Resolves: bz#1719823 + ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) + +* Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] +- kvm-qxl-check-release-info-object.patch [bz#1712717] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] +- kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] +- Resolves: bz#1588356 + (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') +- Resolves: bz#1707118 + (enable device: bochs-display (QEMU)) +- Resolves: bz#1712717 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) +- Resolves: bz#1720306 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") +- Resolves: bz#1722839 + ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 +- kvm-Disable-VXHS-support.patch [bz#1714937] +- kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] +- kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] +- kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] +- kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] +- kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] +- Resolves: bz#1627283 + (Compile out IOH3420 on aarch64) +- Resolves: bz#1713679 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1713735 + (Allow ARM VIRT iommu option in RHEL8.1 machine) +- Resolves: bz#1714891 + (Guest with persistent reservation manager for a disk fails to start) +- Resolves: bz#1714937 + (Disable VXHS support) + +* Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 +- kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] +- kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] +- kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] +- kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] +- Resolves: bz#1709726 + (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 +- kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] +- Resolves: bz#1703297 + (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) +- Resolves: bz#1703304 + (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) +- Resolves: bz#1703310 + (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) +- Resolves: bz#1707274 + (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) + +* Wed May 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-26.el8 +- kvm-target-ppc-spapr-Add-SPAPR_CAP_LARGE_DECREMENTER.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1698711] +- kvm-target-ppc-tcg-make-spapr_caps-apply-cap-cfpc-sbbc-i.patch [bz#1698711] +- kvm-target-ppc-spapr-Enable-mitigations-by-default-for-p.patch [bz#1698711] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1693076] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1693076] +- Resolves: bz#1693076 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-av-8]) +- Resolves: bz#1698711 + (Enable Spectre / Meltdown mitigations by default in pseries-rhel8.0.0 machine type) + +* Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] +- kvm-exec-Only-count-mapped-memory-backends-for-qemu_getr.patch [bz#1680492] +- kvm-Enable-libpmem-to-support-nvdimm.patch [bz#1705149] +- Resolves: bz#1680492 + (Qemu quits suddenly while system_reset after hot-plugging unsupported memory by compatible guest on P9 with 1G huge page set) +- Resolves: bz#1688312 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) +- Resolves: bz#1705149 + (libpmem support is not enabled in qemu-kvm) + +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] +- Resolves: bz#1693173 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] +- Resolves: bz#1687578 + (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) + +* Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 +- kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] +- kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] +- kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] +- kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] +- kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] +- Resolves: bz#1652572 + (QEMU core dumped if stop nfs service during migration) +- Resolves: bz#1655065 + ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) +- Resolves: bz#1666206 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1669053 + (Guest call trace when boot with nvdimm device backed by /dev/dax) +- Resolves: bz#1674438 + (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1683473 + (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1687582 + (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 +- kvm-i386-Add-stibp-flag-name.patch [bz#1686260] +- Resolves: bz#1686260 + (stibp is missing on qemu 3.0 and qemu 3.1) + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 +- kvm-migration-Fix-cancel-state.patch [bz#1608649] +- kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] +- Resolves: bz#1608649 + (Query-migrate get "failed" status after migrate-cancel) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] +- Resolves: bz#1661030 + (Remove MPX support from 8.0 machine types) +- Resolves: bz#1661515 + (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 +- kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] +- Resolves: bz#1678968 + (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 +- kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] +- kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] +- Resolves: bz#1664997 + (Restrict floppy device to RHEL-7 machine types) + +* Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 +- kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] +- Resolves: bz#1664855 + (Run iotests in qemu-kvm build %check phase) +- Resolves: bz#1669924 + (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) + +* Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1644985] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 +- kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] +- kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] +- Resolves: bz#1653590 + ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) +- Resolves: bz#1656276 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1662508 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1669922 + (Backport avocado-qemu tests for QEMU 3.1) +- Resolves: bz#1671519 + (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) +- Resolves: bz#1673014 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) + +* Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +- kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] +- kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] +- kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] +- Resolves: bz#1665896 + (VNC unix listener socket is deleted after first client quits) +- Resolves: bz#1668248 + ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) + +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 +- kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) +- Resolves: bz#1668244 + (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 +- kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] +- kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] +- Resolves: bz#1655947 + (qemu-kvm core dumped after unplug the device which was set io throttling parameters) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] +- Resolves: bz#1659127 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 +- kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] +- kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] +- Resolves: bz#1653511 + (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) + +* Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] +- Resolves: bz#1653114 + (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) +- Resolves: bz#1668205 + (Guest quit with error when hotunplug cpu) + +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 +- kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] +- kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] +- kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] +- kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] +- kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] +- kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] +- Resolves: bz#1648023 + (Provide separate device types for transitional virtio PCI devices - Fast Train) +- Resolves: bz#1656504 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 +- kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] +- kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] +- Resolves: bz#1656510 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) +- Resolves: bz#1661967 + (Kernel prints the message "VPHN is not supported. Disabling polling...") + +* Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 +- kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] +- Resolves: bz#1656508 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) + +* Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 +- kvm-pc-7.5-compat-entries.patch [bz#1655820] +- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] +- kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] +- Resolves: bz#1655820 + (Can't migarate between rhel8 and rhel7 when guest has device "video") +- Resolves: bz#1659604 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1660208 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 +- Rebase to qemu-kvm 3.1.0 + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 + +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) + +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 + +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-36 +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch [] +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch [] +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch [] +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch [] +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch [] +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch [] +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch [] +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch [] +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch [] +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0