import qemu-kvm-7.0.0-13.el9

i9 changed/i9c/qemu-kvm-7.0.0-13.el9
MSVSphere Packaging Team 2 years ago
commit 04835c576c

1
.gitignore vendored

@ -0,0 +1 @@
SOURCES/qemu-7.0.0.tar.xz

@ -0,0 +1 @@
c3fd2403106c33d0470bc9ba4fb4b946c0402248 SOURCES/qemu-7.0.0.tar.xz

@ -0,0 +1,313 @@
From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 26 May 2021 10:56:02 +0200
Subject: Initial redhat build
This patch introduces redhat build structure in redhat subdirectory. In addition,
several issues are fixed in QEMU tree:
- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm
- As we use qemu-kvm as name in all places, this is updated to be consistent
- Man page renamed from qemu to qemu-kvm
- man page is installed using make install so we have to fix it in qemu tree
We disable make check due to issues with some of the tests.
This rebase is based on qemu-kvm-6.2.0-13.el9
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
Rebase changes (6.1.0):
- Move build to .distro
- Move changes for support file to related commit
- Added dependency for python3-sphinx-rtd_theme
- Removed --disable-sheepdog configure option
- Added new hw-display modules
- SASL initialization moved to ui/vnc-auth-sasl.c
- Add accel-qtest-<arch> and accel-tcg-x86_64 libraries
- Added hw-usb-host module
- Disable new configure options (bpf, nvmm, slirp-smbd)
- Use -pie for ksmctl build (annocheck complain fix)
Rebase changes (6.2.0):
- removed --disable-jemalloc and --disable-tcmalloc configure options
- added audio-oss.so
- added fdt requirement for x86_64
- tests/acceptance renamed to tests/avocado
- added multiboot_dma.bin
- Add -Wno-string-plus-int to extra flags
- Updated configure options
Rebase changes (7.0.0):
- Do not use -mlittle CFLAG on ppc64le
- Used upstream handling issue with ui/clipboard.c
- Use -mlittle-endian on ppc64le instead of deleteing it in configure
- Drop --disable-libxml2 option for configure (upstream)
- Remove vof roms
- Disable AVX2 support
- Use internal meson
- Disable new configure options (dbus-display and qga-vss)
- Change permissions on installing tests/Makefile.include
- Remove ssh block driver
Merged patches (6.0.0):
- 605758c902 Limit build on Power to qemu-img and qemu-ga only
Merged patches (6.1.0):
- f04f91751f Use cached tarballs
- 6581165c65 Remove message with running VM count
- 03c3cac9fc spec-file: build qemu-kvm without SPICE and QXL
- e0ae6c1f6c spec-file: Obsolete qemu-kvm-ui-spice
- 9d2e9f9ecf spec: Do not build qemu-kvm-block-gluster
- cf470b4234 spec: Do not link pcnet and ne2k_pci roms
- e981284a6b redhat: Install the s390-netboot.img that we've built
- 24ef557f33 spec: Remove usage of Group: tag
- c40d69b4f4 spec: Drop %defattr usage
- f8e98798ce spec: Clean up BuildRequires
- 47246b43ee spec: Remove iasl BuildRequires
- 170dc1cbe0 spec: Remove redundant 0 in conditionals
- 8718f6fa11 spec: Add more have_XXX conditionals
- a001269ce9 spec: Remove binutils versioned Requires
- 34545ee641 spec: Remove diffutils BuildRequires
- c2c82beac9 spec: Remove redundant Requires:
- 9314c231f4 spec: Add XXX_version macros
- c43db0bf0f spec: Add have_block_rbd
- 3ecb0c0319 qga: drop StandardError=syslog
- 018049dc80 Remove iscsi support
- a2edf18777 redhat: Replace the kvm-setup.service with a /etc/modules-load.d config file
- 387b5fbcfe redhat: Move qemu-kvm-docs dependency to qemu-kvm
- 4ead693178 redhat: introducting qemu-kvm-hw-usbredir
- 4dc6fc3035 redhat: use the standard vhost-user JSON path
- 84757178b4 Fix local build
- 8c394227dd spec: Restrict block drivers in tools
- b6aa7c1fae Move tools to separate package
- eafd82e509 Split qemu-pr-helper to separate package
- 2c0182e2aa spec: RPM_BUILD_ROOT -> %{buildroot}
- 91bd55ca13 spec: More use of %{name} instead of 'qemu-kvm'
- 50ba299c61 spec: Use qemu-pr-helper.service from qemu.git (partial)
- ee08d4e0a3 spec: Use %{_sourcedir} for referencing sources
- 039e7f7d02 spec: Add tools_only
- 884ba71617 spec: %build: Add run_configure helper
- 8ebd864d65 spec: %build: Disable more bits with %{disable_everything} (partial)
- f23fdb53f5 spec: %build: Add macros for some 'configure' parameters
- fe951a8bd8 spec: %files: Move qemu-guest-agent and qemu-img earlier
- 353b632e37 spec: %install: Remove redundant bits
- 9d2015b752 spec: %install: Add %{modprobe_kvm_conf} macro
- 6d05134e8c spec: %install: Remove qemu-guest-agent /etc/qemu-kvm usage
- 985b226467 spec: %install: clean up qemu-ga section
- dfaf9c600d spec: %install: Use a single %{tools_only} section
- f6978ddb46 spec: Make tools_only not cross spec sections
- 071c211098 spec: %install: Limit time spent in %{qemu_kvm_build}
- 1b65c674be spec: misc syntactic merges with Fedora
- 4da16294cf spec: Use Fedora's pattern for specifying rc version
- d7ee259a79 spec: %files: don't use fine grained -docs file list
- 64cad0c60f spec: %files: Add licenses to qemu-common too
- c3de4f080a spec: %install: Drop python3 shebang fixup
- 46fc216115 Update local build to work with spec file improvements
- bab9531548 spec: Remove buildldflags
- c8360ab6a9 spec: Use %make_build macro
- f6966c66e9 spec: Drop make install sharedir and datadir usage
- 86982421bc spec: use %make_install macro
- 191c405d22 spec: parallelize `make check`
- 251a1fb958 spec: Drop explicit --build-id
- 44c7dda6c3 spec: use %{build_ldflags}
- 0009a34354 Move virtiofsd to separate package
- 34d1b200b3 Utilize --firmware configure option
- 2800e1dd03 spec: Switch toolchain to Clang/LLVM (except process-patches.sh)
- e8a70f500f spec: Use safe-stack for x86_64
- e29445d50d spec: Reenable write support for VMDK etc. in tools
- a4fe2a3e16 redhat: Disable LTO on non-x86 architectures
Merged patches (6.2.0):
- 333452440b remove sgabios dependency
- 7d3633f184 enable pulseaudio
- bd898709b0 spec: disable use of gcrypt for crypto backends in favour of gnutls
- e4f0c6dee6 spec: Remove block-curl and block-ssh dependency
- 4dc13bfe63 spec: Build the VDI block driver
- d2f2ff3c74 spec: Explicitly include compress filter
- a7d047f9c2 Move ksmtuned files to separate package
Merged patches (7.0.0):
- 098d4d08d0 spec: Rename qemu-kvm-hw-usbredir to qemu-kvm-device-usb-redirect
- c2bd0d6834 spec: Split qemu-kvm-ui-opengl
- 2c9cda805d spec: Introduce packages for virtio-gpu-* modules (changed as rhel device tree not set)
- d0414a3e0b spec: Introduce device-display-virtio-vga* packages
- 3534ec46d4 spec: Move usb-host module to separate package
- ddc14d4737 spec: Move qtest accel module to tests package
- 6f2c4befa6 spec: Extend qemu-kvm-core description
- 6f11866e4e (rhel/rhel-9.0.0) Update to qemu-kvm-6.2.0-6.el9
- da0a28758f ui/clipboard: fix use-after-free regression
- 895d4d52eb spec: Remove qemu-virtiofsd
- c8c8c8bd84 spec: Fix obsolete for spice subpackages
- d46d2710b2 spec: Obsolete old usb redir subpackage
- 6f52a50b68 spec: Obsolete ssh driver
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
.distro/85-kvm.preset | 5 -
.distro/Makefile | 100 +
.distro/Makefile.common | 40 +
.distro/README.tests | 39 +
.distro/ksm.service | 13 -
.distro/ksm.sysconfig | 4 -
.distro/ksmctl.c | 77 -
.distro/ksmtuned | 139 -
.distro/ksmtuned.conf | 21 -
.distro/ksmtuned.service | 12 -
.distro/kvm-setup | 49 -
.distro/kvm-setup.service | 14 -
.distro/modules-load.conf | 4 +
.distro/qemu-guest-agent.service | 1 -
.distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++
.distro/rpminspect.yaml | 6 +-
.distro/scripts/extract_build_cmd.py | 12 +
.gitignore | 1 +
README.systemtap | 43 +
meson.build | 4 +-
scripts/qemu-guest-agent/fsfreeze-hook | 2 +-
scripts/systemtap/conf.d/qemu_kvm.conf | 4 +
scripts/systemtap/script.d/qemu_kvm.stp | 1 +
tests/check-block.sh | 2 +
ui/vnc-auth-sasl.c | 2 +-
25 files changed, 4290 insertions(+), 339 deletions(-)
delete mode 100644 .distro/85-kvm.preset
create mode 100644 .distro/Makefile
create mode 100644 .distro/Makefile.common
create mode 100644 .distro/README.tests
delete mode 100644 .distro/ksm.service
delete mode 100644 .distro/ksm.sysconfig
delete mode 100644 .distro/ksmctl.c
delete mode 100644 .distro/ksmtuned
delete mode 100644 .distro/ksmtuned.conf
delete mode 100644 .distro/ksmtuned.service
delete mode 100644 .distro/kvm-setup
delete mode 100644 .distro/kvm-setup.service
create mode 100644 .distro/modules-load.conf
create mode 100644 .distro/qemu-kvm.spec.template
create mode 100644 README.systemtap
create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf
create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp
diff --git a/README.systemtap b/README.systemtap
new file mode 100644
index 0000000000..ad913fc990
--- /dev/null
+++ b/README.systemtap
@@ -0,0 +1,43 @@
+QEMU tracing using systemtap-initscript
+---------------------------------------
+
+You can capture QEMU trace data all the time using systemtap-initscript. This
+uses SystemTap's flight recorder mode to trace all running guests to a
+fixed-size buffer on the host. Old trace entries are overwritten by new
+entries when the buffer size wraps.
+
+1. Install the systemtap-initscript package:
+ # yum install systemtap-initscript
+
+2. Install the systemtap scripts and the conf file:
+ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/
+ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/
+
+The set of trace events to enable is given in qemu_kvm.stp. This SystemTap
+script can be customized to add or remove trace events provided in
+/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp.
+
+SystemTap customizations can be made to qemu_kvm.conf to control the flight
+recorder buffer size and whether to store traces in memory only or disk too.
+See stap(1) for option documentation.
+
+3. Start the systemtap service.
+ # service systemtap start qemu_kvm
+
+4. Make the service start at boot time.
+ # chkconfig systemtap on
+
+5. Confirm that the service works.
+ # service systemtap status qemu_kvm
+ qemu_kvm is running...
+
+When you want to inspect the trace buffer, perform the following steps:
+
+1. Dump the trace buffer.
+ # staprun -A qemu_kvm >/tmp/trace.log
+
+2. Start the systemtap service because the preceding step stops the service.
+ # service systemtap start qemu_kvm
+
+3. Translate the trace record to readable format.
+ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log
diff --git a/meson.build b/meson.build
index 861de93c4f..6f7e430f0f 100644
--- a/meson.build
+++ b/meson.build
@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal'
# Include all configuration defines via a header file, which will wind up
# as a dependency on the object file, and thus changes here will result
# in a rebuild.
- '-include', 'capstone-defs.h'
+ '-include', 'capstone-defs.h',
+
+ '-Wp,-D_GLIBCXX_ASSERTIONS',
]
libcapstone = static_library('capstone',
diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook
index 13aafd4845..e9b84ec028 100755
--- a/scripts/qemu-guest-agent/fsfreeze-hook
+++ b/scripts/qemu-guest-agent/fsfreeze-hook
@@ -8,7 +8,7 @@
# request, it is issued with "thaw" argument after filesystem is thawed.
LOGFILE=/var/log/qga-fsfreeze-hook.log
-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d
+FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d
# Check whether file $1 is a backup or rpm-generated file and should be ignored
is_ignored_file() {
diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf
new file mode 100644
index 0000000000..372d8160a4
--- /dev/null
+++ b/scripts/systemtap/conf.d/qemu_kvm.conf
@@ -0,0 +1,4 @@
+# Force load uprobes (see BZ#1118352)
+stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true
+
+qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes
diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp
new file mode 100644
index 0000000000..c04abf9449
--- /dev/null
+++ b/scripts/systemtap/script.d/qemu_kvm.stp
@@ -0,0 +1 @@
+probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {}
diff --git a/tests/check-block.sh b/tests/check-block.sh
index f59496396c..d900d8b35e 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then
skip "bash version too old ==> Not running the qemu-iotests."
fi
+exit 0
+
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 47fdae5b21..2a950caa2a 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -42,7 +42,7 @@
bool vnc_sasl_server_init(Error **errp)
{
- int saslErr = sasl_server_init(NULL, "qemu");
+ int saslErr = sasl_server_init(NULL, "qemu-kvm");
if (saslErr != SASL_OK) {
error_setg(errp, "Failed to initialize SASL auth: %s",
--
2.31.1

@ -0,0 +1,642 @@
From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Thu, 15 Jul 2021 03:22:36 -0400
Subject: Enable/disable devices for RHEL
This commit adds all changes related to changes in supported devices.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
Rebase notes (6.1.0):
- Added CONFIG_TPM (except s390x)
- default-configs moved to configs
- Use --with-device-<ARCH> configure option to use rhel configs
Rebase notes (6.2.0):
- Add CONFIG_ISA_FDC
- Do not remove -no-hpet documentation
Rebase notes (7.0.0):
- Added CONFIG_ARM_GIC_TCG option for aarch64
- Fixes necessary for layout change fixes
- Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG
- Removed upstream devices
Merged patches (6.1.0):
- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak
- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI
- f2fe835153 aarch64-rh-devices: add CONFIG_PXB
- b5431733ad disable CONFIG_USB_STORAGE_BOT
- 478ba0cdf6 Disable TPM passthrough
- 2504d68a7c aarch64: Add USB storage devices
- 51c2a3253c disable ac97 audio
Merged patches (6.2.0):
- 9f2f9fa2ba disable sga device
Merged patches (7.0.0):
- fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64
- c9e68ea451 Enable SGX -- RH Only
---
.distro/qemu-kvm.spec.template | 18 +--
.../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++
.../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++
configs/devices/rh-virtio.mak | 10 ++
.../s390x-softmmu/s390x-rh-devices.mak | 15 +++
.../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++
hw/acpi/ich9.c | 4 +-
hw/arm/meson.build | 2 +-
hw/block/fdc.c | 10 ++
hw/cpu/meson.build | 5 +-
hw/display/cirrus_vga.c | 5 +-
hw/ide/piix.c | 5 +-
hw/input/pckbd.c | 2 +
hw/net/e1000.c | 2 +
hw/ppc/spapr_cpu_core.c | 2 +
hw/usb/meson.build | 2 +-
target/arm/cpu_tcg.c | 10 ++
target/ppc/cpu-models.c | 9 ++
target/s390x/cpu_models_sysemu.c | 3 +
target/s390x/kvm/kvm.c | 8 ++
20 files changed, 269 insertions(+), 15 deletions(-)
create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
create mode 100644 configs/devices/rh-virtio.mak
create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak
create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
new file mode 100644
index 0000000000..5f6ee1de5b
--- /dev/null
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
@@ -0,0 +1,34 @@
+include ../rh-virtio.mak
+
+CONFIG_ARM_GIC_KVM=y
+CONFIG_ARM_GICV3_TCG=y
+CONFIG_ARM_GIC=y
+CONFIG_ARM_SMMUV3=y
+CONFIG_ARM_V7M=y
+CONFIG_ARM_VIRT=y
+CONFIG_EDID=y
+CONFIG_PCIE_PORT=y
+CONFIG_PCI_DEVICES=y
+CONFIG_PCI_TESTDEV=y
+CONFIG_PFLASH_CFI01=y
+CONFIG_SCSI=y
+CONFIG_SEMIHOSTING=y
+CONFIG_USB=y
+CONFIG_USB_XHCI=y
+CONFIG_USB_XHCI_PCI=y
+CONFIG_USB_STORAGE_CORE=y
+CONFIG_USB_STORAGE_CLASSIC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_XIO3130=y
+CONFIG_NVDIMM=y
+CONFIG_ACPI_APEI=y
+CONFIG_TPM=y
+CONFIG_TPM_EMULATOR=y
+CONFIG_TPM_TIS_SYSBUS=y
+CONFIG_PTIMER=y
+CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y
+CONFIG_PVPANIC_PCI=y
+CONFIG_PXB=y
diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
new file mode 100644
index 0000000000..6a3e3f0227
--- /dev/null
+++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak
@@ -0,0 +1,35 @@
+include ../rh-virtio.mak
+
+CONFIG_DIMM=y
+CONFIG_MEM_DEVICE=y
+CONFIG_NVDIMM=y
+CONFIG_PCI=y
+CONFIG_PCI_DEVICES=y
+CONFIG_PCI_TESTDEV=y
+CONFIG_PCI_EXPRESS=y
+CONFIG_PSERIES=y
+CONFIG_SCSI=y
+CONFIG_SPAPR_VSCSI=y
+CONFIG_TEST_DEVICES=y
+CONFIG_USB=y
+CONFIG_USB_OHCI=y
+CONFIG_USB_OHCI_PCI=y
+CONFIG_USB_SMARTCARD=y
+CONFIG_USB_STORAGE_CORE=y
+CONFIG_USB_STORAGE_CLASSIC=y
+CONFIG_USB_XHCI=y
+CONFIG_USB_XHCI_NEC=y
+CONFIG_USB_XHCI_PCI=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
+CONFIG_VGA=y
+CONFIG_VGA_PCI=y
+CONFIG_VHOST_USER=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VGA=y
+CONFIG_WDT_IB6300ESB=y
+CONFIG_XICS=y
+CONFIG_XIVE=y
+CONFIG_TPM=y
+CONFIG_TPM_SPAPR=y
+CONFIG_TPM_EMULATOR=y
diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak
new file mode 100644
index 0000000000..94ede1b5f6
--- /dev/null
+++ b/configs/devices/rh-virtio.mak
@@ -0,0 +1,10 @@
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_GPU=y
+CONFIG_VIRTIO_INPUT=y
+CONFIG_VIRTIO_INPUT_HOST=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_RNG=y
+CONFIG_VIRTIO_SCSI=y
+CONFIG_VIRTIO_SERIAL=y
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
new file mode 100644
index 0000000000..d3b38312e1
--- /dev/null
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
@@ -0,0 +1,15 @@
+include ../rh-virtio.mak
+
+CONFIG_PCI=y
+CONFIG_S390_CCW_VIRTIO=y
+CONFIG_S390_FLIC=y
+CONFIG_S390_FLIC_KVM=y
+CONFIG_SCLPCONSOLE=y
+CONFIG_SCSI=y
+CONFIG_VFIO=y
+CONFIG_VFIO_AP=y
+CONFIG_VFIO_CCW=y
+CONFIG_VFIO_PCI=y
+CONFIG_VHOST_USER=y
+CONFIG_VIRTIO_CCW=y
+CONFIG_WDT_DIAG288=y
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
new file mode 100644
index 0000000000..d0c9e66641
--- /dev/null
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -0,0 +1,103 @@
+include ../rh-virtio.mak
+
+CONFIG_ACPI=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_CPU_HOTPLUG=y
+CONFIG_ACPI_MEMORY_HOTPLUG=y
+CONFIG_ACPI_NVDIMM=y
+CONFIG_ACPI_SMBUS=y
+CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_X86=y
+CONFIG_ACPI_X86_ICH=y
+CONFIG_AHCI=y
+CONFIG_APIC=y
+CONFIG_APM=y
+CONFIG_BOCHS_DISPLAY=y
+CONFIG_DIMM=y
+CONFIG_E1000E_PCI_EXPRESS=y
+CONFIG_E1000_PCI=y
+CONFIG_EDU=y
+CONFIG_FDC=y
+CONFIG_FDC_SYSBUS=y
+CONFIG_FDC_ISA=y
+CONFIG_FW_CFG_DMA=y
+CONFIG_HDA=y
+CONFIG_HYPERV=y
+CONFIG_HYPERV_TESTDEV=y
+CONFIG_I2C=y
+CONFIG_I440FX=y
+CONFIG_I8254=y
+CONFIG_I8257=y
+CONFIG_I8259=y
+CONFIG_I82801B11=y
+CONFIG_IDE_CORE=y
+CONFIG_IDE_PCI=y
+CONFIG_IDE_PIIX=y
+CONFIG_IDE_QDEV=y
+CONFIG_IOAPIC=y
+CONFIG_IOH3420=y
+CONFIG_ISA_BUS=y
+CONFIG_ISA_DEBUG=y
+CONFIG_ISA_TESTDEV=y
+CONFIG_LPC_ICH9=y
+CONFIG_MC146818RTC=y
+CONFIG_MEM_DEVICE=y
+CONFIG_NVDIMM=y
+CONFIG_OPENGL=y
+CONFIG_PAM=y
+CONFIG_PC=y
+CONFIG_PCI=y
+CONFIG_PCIE_PORT=y
+CONFIG_PCI_DEVICES=y
+CONFIG_PCI_EXPRESS=y
+CONFIG_PCI_EXPRESS_Q35=y
+CONFIG_PCI_I440FX=y
+CONFIG_PCI_TESTDEV=y
+CONFIG_PCKBD=y
+CONFIG_PCSPK=y
+CONFIG_PC_ACPI=y
+CONFIG_PC_PCI=y
+CONFIG_PFLASH_CFI01=y
+CONFIG_PVPANIC_ISA=y
+CONFIG_PXB=y
+CONFIG_Q35=y
+CONFIG_RTL8139_PCI=y
+CONFIG_SCSI=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_ISA=y
+CONFIG_SERIAL_PCI=y
+CONFIG_SEV=y
+CONFIG_SMBIOS=y
+CONFIG_SMBUS_EEPROM=y
+CONFIG_TEST_DEVICES=y
+CONFIG_USB=y
+CONFIG_USB_EHCI=y
+CONFIG_USB_EHCI_PCI=y
+CONFIG_USB_SMARTCARD=y
+CONFIG_USB_STORAGE_CORE=y
+CONFIG_USB_STORAGE_CLASSIC=y
+CONFIG_USB_UHCI=y
+CONFIG_USB_XHCI=y
+CONFIG_USB_XHCI_NEC=y
+CONFIG_USB_XHCI_PCI=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
+CONFIG_VGA=y
+CONFIG_VGA_CIRRUS=y
+CONFIG_VGA_PCI=y
+CONFIG_VHOST_USER=y
+CONFIG_VHOST_USER_BLK=y
+CONFIG_VIRTIO_MEM=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VGA=y
+CONFIG_VMMOUSE=y
+CONFIG_VMPORT=y
+CONFIG_VTD=y
+CONFIG_WDT_IB6300ESB=y
+CONFIG_WDT_IB700=y
+CONFIG_XIO3130=y
+CONFIG_TPM=y
+CONFIG_TPM_CRB=y
+CONFIG_TPM_TIS_ISA=y
+CONFIG_TPM_EMULATOR=y
+CONFIG_SGX=y
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index bd9bbade70..de1e401cdf 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
pm->acpi_memory_hotplug.is_enabled = true;
pm->cpu_hotplug_legacy = true;
- pm->disable_s3 = 0;
- pm->disable_s4 = 0;
+ pm->disable_s3 = 1;
+ pm->disable_s4 = 1;
pm->s4_val = 2;
pm->use_acpi_hotplug_bridge = true;
pm->keep_pci_slot_hpc = true;
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 721a8eb8be..87ed4dd914 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c'))
arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c'))
-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
+#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c'))
arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c'))
arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c'))
arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c'))
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 347875a0cd..ca1776121f 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -49,6 +49,8 @@
#include "qom/object.h"
#include "fdc-internal.h"
+#include "hw/boards.h"
+
/********************************************************/
/* debug Floppy devices */
@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp)
FDrive *drive;
static int command_tables_inited = 0;
+ /* Restricted for Red Hat Enterprise Linux: */
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+ if (!strstr(mc->name, "-rhel7.")) {
+ error_setg(errp, "Device %s is not supported with machine type %s",
+ object_get_typename(OBJECT(dev)), mc->name);
+ return;
+ }
+
if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) {
error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'");
return;
diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build
index 9e52fee9e7..bb71c9f3e7 100644
--- a/hw/cpu/meson.build
+++ b/hw/cpu/meson.build
@@ -1,6 +1,7 @@
-softmmu_ss.add(files('core.c', 'cluster.c'))
+#softmmu_ss.add(files('core.c', 'cluster.c'))
+softmmu_ss.add(files('core.c'))
specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c'))
specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c'))
specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c'))
-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c'))
+#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c'))
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 3bb6a58698..6447fdb02e 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp)
PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
int16_t device_id = pc->device_id;
- /*
+ warn_report("'cirrus-vga' is deprecated, "
+ "please use a different VGA card instead");
+
+ /*
* Follow real hardware, cirrus card emulated has 4 MB video memory.
* Also accept 8 MB/16 MB for backward compatibility.
*/
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index ce89fd0aa3..fbcf802b13 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1;
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
- dc->hotpluggable = false;
+ /* Disabled for Red Hat Enterprise Linux: */
+ dc->user_creatable = false;
}
static const TypeInfo piix3_ide_info = {
@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data)
k->class_id = PCI_CLASS_STORAGE_IDE;
set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
dc->hotpluggable = false;
+ /* Disabled for Red Hat Enterprise Linux: */
+ dc->user_creatable = false;
}
static const TypeInfo piix4_ide_info = {
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index 4efdf75620..5143ebaa27 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data)
dc->vmsd = &vmstate_kbd_isa;
isa->build_aml = i8042_build_aml;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+ /* Disabled for Red Hat Enterprise Linux: */
+ dc->user_creatable = false;
}
static const TypeInfo i8042_info = {
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index f5bc81296d..282d01e374 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
+#if 0 /* Disabled for Red Hat Enterprise Linux 7 */
{
.name = "e1000-82544gc",
.device_id = E1000_DEV_ID_82544GC_COPPER,
@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = {
.revision = 0x03,
.phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT,
},
+#endif
};
static void e1000_register_types(void)
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 8a4861f45a..fcb5dfe792 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
.instance_size = sizeof(SpaprCpuCore),
.class_size = sizeof(SpaprCpuCoreClass),
},
+#if 0 /* Disabled for Red Hat Enterprise Linux */
DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"),
DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"),
DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"),
+#endif
DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"),
DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index de853d780d..0776ae6a20 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade
if cacard.found()
usbsmartcard_ss = ss.source_set()
usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD',
- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')])
+ if_true: [cacard, files('ccid-card-passthru.c')])
hw_usb_modules += {'smartcard': usbsmartcard_ss}
endif
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 13d0e9b195..3826fa5122 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -22,6 +22,7 @@
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj)
cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
+#endif /* disabled for RHEL */
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
REGINFO_SENTINEL
};
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj)
cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
+#endif /* disabled for RHEL */
static void cortex_a15_initfn(Object *obj)
{
@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
+#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
/*
@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
+#endif /* disabled for RHEL */
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
+#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index 976be5e0d1..dd78883410 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -66,6 +66,7 @@
#define POWERPC_DEF(_name, _pvr, _type, _desc) \
POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type)
+#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */
/* Embedded PowerPC */
/* PowerPC 405 family */
/* PowerPC 405 cores */
@@ -698,8 +699,10 @@
"PowerPC 7447A v1.2 (G4)")
POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455,
"PowerPC 7457A v1.2 (G4)")
+#endif
/* 64 bits PowerPC */
#if defined(TARGET_PPC64)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970,
"PowerPC 970 v2.2")
POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970,
@@ -718,6 +721,7 @@
"PowerPC 970MP v1.1")
POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P,
"POWER5+ v2.1")
+#endif
POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7,
"POWER7 v2.3")
POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7,
@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "7447a", "7447a_v1.2" },
{ "7457a", "7457a_v1.2" },
{ "apollo7pm", "7457a_v1.0" },
+#endif
#if defined(TARGET_PPC64)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ "970", "970_v2.2" },
{ "970fx", "970fx_v3.1" },
{ "970mp", "970mp_v1.1" },
{ "power5+", "power5+_v2.1" },
{ "power5gs", "power5+_v2.1" },
+#endif
{ "power7", "power7_v2.3" },
{ "power7+", "power7+_v2.1" },
{ "power8e", "power8e_v2.1" },
@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "power10", "power10_v2.0" },
#endif
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/* Generic PowerPCs */
#if defined(TARGET_PPC64)
{ "ppc64", "970fx_v3.1" },
@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
{ "ppc32", "604" },
{ "ppc", "604" },
{ "default", "604" },
+#endif
{ NULL, NULL }
};
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index 05c3ccaaff..6a04ccab1b 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model,
(max_model->def->gen == model->def->gen &&
max_model->def->ec_ga < model->def->ec_ga)) {
list_add_feat("type", unavailable);
+ } else if (model->def->gen < 11 && kvm_enabled()) {
+ /* Older CPU models are not supported on Red Hat Enterprise Linux */
+ list_add_feat("type", unavailable);
}
/* detect missing features if any to properly report them */
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 6acf14d5ec..74f089d87f 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
error_setg(errp, "KVM doesn't support CPU models");
return;
}
+
+ /* Older CPU models are not supported on Red Hat Enterprise Linux */
+ if (model->def->gen < 11) {
+ error_setg(errp, "KVM: Unsupported CPU type specified: %s",
+ MACHINE(qdev_get_machine())->cpu_type);
+ return;
+ }
+
prop.cpuid = s390_cpuid_from_cpu_model(model);
prop.ibc = s390_ibc_from_cpu_model(model);
/* configure cpu features indicated via STFL(e) */
--
2.31.1

@ -0,0 +1,619 @@
From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 11 Jan 2019 09:54:45 +0100
Subject: Machine type related general changes
This patch is first part of original "Add RHEL machine types" patch we
split to allow easier review. It contains changes not related to any
architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
Rebase notes (6.2.0):
- Do not duplicate minimal_version_id for piix4_pm
- Remove empty line chunks in serial.c
- Remove migration.h include in serial.c
- Update hw_compat_rhel_8_5 (from MR 66)
Rebase notes (7.0.0):
- Remove downstream changes leftovers in hw/rtc/mc146818rtc.c
- Remove unnecessary change in hw/usb/hcd-uhci.c
Merged patches (6.1.0):
- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4
- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3
- a3995e2eff Remove RHEL 7.0.0 machine type (only generic changes)
- ad3190a79b Remove RHEL 7.1.0 machine type (only generic changes)
- 84bbe15d4e Remove RHEL 7.2.0 machine type (only generic changes)
- 0215eb3356 Remove RHEL 7.3.0 machine types (only generic changes)
- af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes)
- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes)
Merged patches (6.2.0):
- d687ac13d2 redhat: Define hw_compat_rhel_8_5
Merged patches (7.0.0):
- ef5afcc86d Fix virtio-net-pci* "vectors" compat
- 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes
---
hw/acpi/piix4.c | 6 +-
hw/arm/virt.c | 2 +-
hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++
hw/display/vga-isa.c | 2 +-
hw/i386/pc_piix.c | 2 +
hw/i386/pc_q35.c | 2 +
hw/net/rtl8139.c | 4 +-
hw/smbios/smbios.c | 46 ++++++++-
hw/timer/i8254_common.c | 2 +-
hw/usb/hcd-xhci-pci.c | 59 ++++++++---
hw/usb/hcd-xhci-pci.h | 1 +
include/hw/boards.h | 21 ++++
include/hw/firmware/smbios.h | 5 +-
include/hw/i386/pc.h | 3 +
14 files changed, 316 insertions(+), 25 deletions(-)
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index fe5625d07a..28544e78c3 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
static const VMStateDescription vmstate_acpi = {
.name = "piix4_pm",
.version_id = 3,
- .minimum_version_id = 3,
+ .minimum_version_id = 2,
.post_load = vmstate_acpi_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState),
@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
static Property piix4_pm_properties[] = {
DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0),
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1),
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1),
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
use_acpi_hotplug_bridge, true),
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index d2e5ecd234..6a84031fd7 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms)
smbios_set_defaults("QEMU", product,
vmc->smbios_old_sys_ver ? "1.0" : mc->name, false,
- true, SMBIOS_ENTRY_POINT_TYPE_64);
+ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64);
smbios_get_tables(MACHINE(vms), NULL, 0,
&smbios_tables, &smbios_tables_len,
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1e23fdc14b..ea430d844e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,6 +37,192 @@
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-pci.h"
+/*
+ * Mostly the same as hw_compat_6_0 and hw_compat_6_1
+ */
+GlobalProperty hw_compat_rhel_8_5[] = {
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "gpex-pcihost", "allow-unmapped-accesses", "false" },
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "i8042", "extended-state", "false"},
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "nvme-ns", "eui64-default", "off"},
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "e1000", "init-vet", "off" },
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "e1000e", "init-vet", "off" },
+ /* hw_compat_rhel_8_5 from hw_compat_6_0 */
+ { "vhost-vsock-device", "seqpacket", "off" },
+ /* hw_compat_rhel_8_5 from hw_compat_6_1 */
+ { "vhost-user-vsock-device", "seqpacket", "off" },
+ /* hw_compat_rhel_8_5 from hw_compat_6_1 */
+ { "nvme-ns", "shared", "off" },
+};
+const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5);
+
+/*
+ * Mostly the same as hw_compat_5_2
+ */
+GlobalProperty hw_compat_rhel_8_4[] = {
+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */
+ { "ICH9-LPC", "smm-compat", "on"},
+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */
+ { "PIIX4_PM", "smm-compat", "on"},
+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */
+ { "virtio-blk-device", "report-discard-granularity", "off" },
+ /* hw_compat_rhel_8_4 from hw_compat_5_2 */
+ /*
+ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base",
+ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141)
+ */
+ { "virtio-net-pci-base", "vectors", "3"},
+};
+const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4);
+
+/*
+ * Mostly the same as hw_compat_5_1
+ */
+GlobalProperty hw_compat_rhel_8_3[] = {
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "vhost-scsi", "num_queues", "1"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "vhost-user-blk", "num-queues", "1"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "vhost-user-scsi", "num_queues", "1"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "virtio-blk-device", "num-queues", "1"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "virtio-scsi-device", "num_queues", "1"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "nvme", "use-intel-id", "on"},
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "pl011", "migrate-clk", "off" },
+ /* hw_compat_rhel_8_3 bz 1912846 */
+ { "pci-xhci", "x-rh-late-msi-cap", "off" },
+ /* hw_compat_rhel_8_3 from hw_compat_5_1 */
+ { "virtio-pci", "x-ats-page-aligned", "off"},
+};
+const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3);
+
+/*
+ * The same as hw_compat_4_2 + hw_compat_5_0
+ */
+GlobalProperty hw_compat_rhel_8_2[] = {
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-blk-device", "queue-size", "128"},
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-scsi-device", "virtqueue_size", "128"},
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-blk-device", "seg-max-adjust", "off"},
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-scsi-device", "seg_max_adjust", "off"},
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "vhost-blk-device", "seg_max_adjust", "off"},
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "usb-host", "suppress-remote-wake", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "usb-redir", "suppress-remote-wake", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "qxl", "revision", "4" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "qxl-vga", "revision", "4" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "fw_cfg", "acpi-mr-restore", "false" },
+ /* hw_compat_rhel_8_2 from hw_compat_4_2 */
+ { "virtio-device", "use-disabled-flag", "false" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "virtio-balloon-device", "page-poison", "false" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "vmport", "x-read-set-eax", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "vmport", "x-signal-unsupported-cmd", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "vmport", "x-report-vmx-type", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "vmport", "x-cmds-v2", "off" },
+ /* hw_compat_rhel_8_2 from hw_compat_5_0 */
+ { "virtio-device", "x-disable-legacy-check", "true" },
+};
+const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2);
+
+/*
+ * The same as hw_compat_4_1
+ */
+GlobalProperty hw_compat_rhel_8_1[] = {
+ /* hw_compat_rhel_8_1 from hw_compat_4_1 */
+ { "virtio-pci", "x-pcie-flr-init", "off" },
+};
+const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1);
+
+/* The same as hw_compat_3_1
+ * format of array has been changed by:
+ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays")
+ */
+GlobalProperty hw_compat_rhel_8_0[] = {
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "pcie-root-port", "x-speed", "2_5" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "pcie-root-port", "x-width", "1" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "tpm-crb", "ppi", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "tpm-tis", "ppi", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "usb-kbd", "serial", "42" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "usb-mouse", "serial", "42" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "usb-tablet", "serial", "42" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "virtio-blk-device", "discard", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 */
+ { "virtio-blk-device", "write-zeroes", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "VGA", "edid", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "secondary-vga", "edid", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "bochs-display", "edid", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "virtio-vga", "edid", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "virtio-gpu-device", "edid", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_4_0 */
+ { "virtio-device", "use-started", "false" },
+ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */
+ { "pcie-root-port-base", "disable-acs", "true" },
+};
+const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0);
+
+/* The same as hw_compat_3_0 + hw_compat_2_12
+ * except that
+ * there's nothing in 3_0
+ * migration.decompress-error-check=off was in 7.5 from bz 1584139
+ */
+GlobalProperty hw_compat_rhel_7_6[] = {
+ /* hw_compat_rhel_7_6 from hw_compat_2_12 */
+ { "hda-audio", "use-timer", "false" },
+ /* hw_compat_rhel_7_6 from hw_compat_2_12 */
+ { "cirrus-vga", "global-vmstate", "true" },
+ /* hw_compat_rhel_7_6 from hw_compat_2_12 */
+ { "VGA", "global-vmstate", "true" },
+ /* hw_compat_rhel_7_6 from hw_compat_2_12 */
+ { "vmware-svga", "global-vmstate", "true" },
+ /* hw_compat_rhel_7_6 from hw_compat_2_12 */
+ { "qxl-vga", "global-vmstate", "true" },
+};
+const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6);
+
GlobalProperty hw_compat_6_2[] = {
{ "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
};
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index 46abbc5653..505467059b 100644
--- a/hw/display/vga-isa.c
+++ b/hw/display/vga-isa.c
@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp)
}
static Property vga_isa_properties[] = {
- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8),
+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index b72c03d0a6..c797e98312 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine,
smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
+ pcmc->smbios_stream_product,
+ pcmc->smbios_stream_version,
pcms->smbios_entry_point_type);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 1780f79bc1..b695f88c45 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine)
smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
mc->name, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
+ pcmc->smbios_stream_product,
+ pcmc->smbios_stream_version,
pcms->smbios_entry_point_type);
}
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 6b65823b4b..75dacabc43 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque)
static const VMStateDescription vmstate_rtl8139 = {
.name = "rtl8139",
- .version_id = 5,
+ .version_id = 4,
.minimum_version_id = 3,
.post_load = rtl8139_post_load,
.pre_save = rtl8139_pre_save,
@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = {
VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State),
VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State),
VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State),
+#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */
VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5),
+#endif
VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State),
VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State),
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 60349ee402..0edcc98434 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -57,6 +57,9 @@ static bool smbios_legacy = true;
static bool smbios_uuid_encoded = true;
/* end: legacy structures & constants for <= 2.0 machines */
+/* Set to true for modern Windows 10 HardwareID-6 compat */
+static bool smbios_type2_required;
+
uint8_t *smbios_tables;
size_t smbios_tables_len;
@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void)
static void smbios_build_type_2_table(void)
{
- SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, false); /* optional */
+ SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, smbios_type2_required);
SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer);
SMBIOS_TABLE_SET_STR(2, product_str, type2.product);
@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features)
void smbios_set_defaults(const char *manufacturer, const char *product,
const char *version, bool legacy_mode,
- bool uuid_encoded, SmbiosEntryPointType ep_type)
+ bool uuid_encoded,
+ const char *stream_product,
+ const char *stream_version,
+ SmbiosEntryPointType ep_type)
{
smbios_have_defaults = true;
smbios_legacy = legacy_mode;
@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product,
g_free(smbios_entries);
}
+ /*
+ * If @stream_product & @stream_version are non-NULL, then
+ * we're following rules for new Windows driver support.
+ * The data we have to report is defined in this doc:
+ *
+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer
+ *
+ * The Windows drivers are written to expect use of the
+ * scheme documented as "HardwareID-6" against Windows 10,
+ * which uses SMBIOS System (Type 1) and Base Board (Type 2)
+ * tables and will match on
+ *
+ * System Manufacturer = Red Hat (@manufacturer)
+ * System SKU Number = 8.2.0 (@stream_version)
+ * Baseboard Manufacturer = Red Hat (@manufacturer)
+ * Baseboard Product = RHEL-AV (@stream_product)
+ *
+ * NB, SKU must be changed with each RHEL-AV release
+ *
+ * Other fields can be freely used by applications using
+ * QEMU. For example apps can use the "System product"
+ * and "System version" to identify themselves.
+ *
+ * We get 'System Manufacturer' and 'Baseboard Manufacturer'
+ */
SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer);
SMBIOS_SET_DEFAULT(type1.product, product);
SMBIOS_SET_DEFAULT(type1.version, version);
+ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux");
+ if (stream_version != NULL) {
+ SMBIOS_SET_DEFAULT(type1.sku, stream_version);
+ }
SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer);
- SMBIOS_SET_DEFAULT(type2.product, product);
+ if (stream_product != NULL) {
+ SMBIOS_SET_DEFAULT(type2.product, stream_product);
+ smbios_type2_required = true;
+ } else {
+ SMBIOS_SET_DEFAULT(type2.product, product);
+ }
SMBIOS_SET_DEFAULT(type2.version, version);
SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer);
SMBIOS_SET_DEFAULT(type3.version, version);
diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c
index 050875b497..32935da46c 100644
--- a/hw/timer/i8254_common.c
+++ b/hw/timer/i8254_common.c
@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = {
.pre_save = pit_dispatch_pre_save,
.post_load = pit_dispatch_post_load,
.fields = (VMStateField[]) {
- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3),
+ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */
VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2,
vmstate_pit_channel, PITChannelState),
VMSTATE_INT64(channels[0].next_transition_time,
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index e934b1a5b1..e18b05e528 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id)
return 0;
}
+/* RH bz 1912846 */
+static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp)
+{
+ int ret;
+ Error *err = NULL;
+ XHCIPciState *s = XHCI_PCI(dev);
+
+ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err);
+ /*
+ * Any error other than -ENOTSUP(board's MSI support is broken)
+ * is a programming error
+ */
+ assert(!ret || ret == -ENOTSUP);
+ if (ret && s->msi == ON_OFF_AUTO_ON) {
+ /* Can't satisfy user's explicit msi=on request, fail */
+ error_append_hint(&err, "You have to use msi=auto (default) or "
+ "msi=off with this machine type.\n");
+ error_propagate(errp, err);
+ return true;
+ }
+ assert(!err || s->msi == ON_OFF_AUTO_AUTO);
+ /* With msi=auto, we fall back to MSI off silently */
+ error_free(err);
+
+ return false;
+}
+
static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp)
{
int ret;
@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp)
s->xhci.nec_quirks = true;
}
- if (s->msi != ON_OFF_AUTO_OFF) {
- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err);
- /*
- * Any error other than -ENOTSUP(board's MSI support is broken)
- * is a programming error
- */
- assert(!ret || ret == -ENOTSUP);
- if (ret && s->msi == ON_OFF_AUTO_ON) {
- /* Can't satisfy user's explicit msi=on request, fail */
- error_append_hint(&err, "You have to use msi=auto (default) or "
- "msi=off with this machine type.\n");
+ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) {
+ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */
+ if (usb_xhci_pci_add_msi(dev, &err)) {
error_propagate(errp, err);
return;
}
- assert(!err || s->msi == ON_OFF_AUTO_AUTO);
- /* With msi=auto, we fall back to MSI off silently */
- error_free(err);
}
pci_register_bar(dev, 0,
PCI_BASE_ADDRESS_SPACE_MEMORY |
@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp)
assert(ret > 0);
}
+ /* RH bz 1912846 */
+ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) {
+ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */
+ if (usb_xhci_pci_add_msi(dev, &err)) {
+ error_propagate(errp, err);
+ return;
+ }
+ }
if (s->msix != ON_OFF_AUTO_OFF) {
/* TODO check for errors, and should fail when msix=on */
msix_init(dev, s->xhci.numintrs,
@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj)
qdev_alias_all_properties(DEVICE(&s->xhci), obj);
}
+static Property xhci_pci_properties[] = {
+ /* RH bz 1912846 */
+ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true),
+ DEFINE_PROP_END_OF_LIST()
+};
+
static void xhci_class_init(ObjectClass *klass, void *data)
{
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
+ device_class_set_props(dc, xhci_pci_properties);
dc->reset = xhci_pci_reset;
dc->vmsd = &vmstate_xhci_pci;
set_bit(DEVICE_CATEGORY_USB, dc->categories);
diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h
index c193f79443..086a1feb1e 100644
--- a/hw/usb/hcd-xhci-pci.h
+++ b/hw/usb/hcd-xhci-pci.h
@@ -39,6 +39,7 @@ typedef struct XHCIPciState {
XHCIState xhci;
OnOffAuto msi;
OnOffAuto msix;
+ bool rh_late_msi_cap; /* bz 1912846 */
} XHCIPciState;
#endif
diff --git a/include/hw/boards.h b/include/hw/boards.h
index c92ac8815c..c90a19b4d1 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_8_5[];
+extern const size_t hw_compat_rhel_8_5_len;
+
+extern GlobalProperty hw_compat_rhel_8_4[];
+extern const size_t hw_compat_rhel_8_4_len;
+
+extern GlobalProperty hw_compat_rhel_8_3[];
+extern const size_t hw_compat_rhel_8_3_len;
+
+extern GlobalProperty hw_compat_rhel_8_2[];
+extern const size_t hw_compat_rhel_8_2_len;
+
+extern GlobalProperty hw_compat_rhel_8_1[];
+extern const size_t hw_compat_rhel_8_1_len;
+
+extern GlobalProperty hw_compat_rhel_8_0[];
+extern const size_t hw_compat_rhel_8_0_len;
+
+extern GlobalProperty hw_compat_rhel_7_6[];
+extern const size_t hw_compat_rhel_7_6_len;
+
#endif
diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h
index 4b7ad77a44..9acff96a86 100644
--- a/include/hw/firmware/smbios.h
+++ b/include/hw/firmware/smbios.h
@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp);
void smbios_set_cpuid(uint32_t version, uint32_t features);
void smbios_set_defaults(const char *manufacturer, const char *product,
const char *version, bool legacy_mode,
- bool uuid_encoded, SmbiosEntryPointType ep_type);
+ bool uuid_encoded,
+ const char *stream_product,
+ const char *stream_version,
+ SmbiosEntryPointType ep_type);
uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length);
void smbios_get_tables(MachineState *ms,
const struct smbios_phys_mem_area *mem_array,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 1a27de9c8b..91331059d9 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -113,6 +113,9 @@ struct PCMachineClass {
bool smbios_defaults;
bool smbios_legacy_mode;
bool smbios_uuid_encoded;
+ /* New fields needed for Windows HardwareID-6 matching */
+ const char *smbios_stream_product;
+ const char *smbios_stream_version;
/* RAM / address space compat: */
bool gigabyte_align;
--
2.31.1

@ -0,0 +1,352 @@
From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 12:53:31 +0200
Subject: Add aarch64 machine types
Adding changes to add RHEL machine types for aarch64 architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
Rebase notes (6.1.0):
- Use CONFIG_TPM check when using TPM structures
- Add support for default_bus_bypass_iommu
- ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties
- 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones
Rebase notes (7.0.0):
- Added dtb-kaslr-seed option
- Set no_tcg_lpa2 to true
Merged patches (6.2.0):
- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type
- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type
Merged patches (7.0.0):
- 3b82be3dd3 redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update
- c354a86c9b hw/arm/virt: Register "iommu" as a class property
- c1a2630dc9 hw/arm/virt: Register "its" as a class property
- 9d8c61dc93 hw/arm/virt: Rename default_bus_bypass_iommu
- a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option
- 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one
- ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes
---
hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++-
include/hw/arm/virt.h | 8 ++
2 files changed, 241 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6a84031fd7..e06862d22a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -80,6 +80,7 @@
#include "hw/char/pl011.h"
#include "qemu/guest-random.h"
+#if 0 /* Disabled for Red Hat Enterprise Linux */
#define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
static void virt_##major##_##minor##_class_init(ObjectClass *oc, \
void *data) \
@@ -106,7 +107,48 @@
DEFINE_VIRT_MACHINE_LATEST(major, minor, true)
#define DEFINE_VIRT_MACHINE(major, minor) \
DEFINE_VIRT_MACHINE_LATEST(major, minor, false)
-
+#endif /* disabled for RHEL */
+
+#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \
+ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \
+ void *data) \
+ { \
+ MachineClass *mc = MACHINE_CLASS(oc); \
+ rhel##m##n##s##_virt_options(mc); \
+ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \
+ if (latest) { \
+ mc->alias = "virt"; \
+ mc->is_default = 1; \
+ } \
+ } \
+ static const TypeInfo rhel##m##n##s##_machvirt_info = { \
+ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \
+ .parent = TYPE_RHEL_MACHINE, \
+ .class_init = rhel##m##n##s##_virt_class_init, \
+ }; \
+ static void rhel##m##n##s##_machvirt_init(void) \
+ { \
+ type_register_static(&rhel##m##n##s##_machvirt_info); \
+ } \
+ type_init(rhel##m##n##s##_machvirt_init);
+
+#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \
+ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true)
+#define DEFINE_RHEL_MACHINE(major, minor, subminor) \
+ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false)
+
+/* This variable is for changes to properties that are RHEL specific,
+ * different to the current upstream and to be applied to the latest
+ * machine type.
+ */
+GlobalProperty arm_rhel_compat[] = {
+ {
+ .driver = "virtio-net-pci",
+ .property = "romfile",
+ .value = "",
+ },
+};
+const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat);
/* Number of external interrupt lines to configure the GIC with */
#define NUM_IRQS 256
@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine)
qemu_add_machine_init_done_notifier(&vms->machine_done);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_secure(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp)
vms->virt = value;
}
+#endif /* disabled for RHEL */
static bool virt_get_highmem(Object *obj, Error **errp)
{
@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp)
vms->ras = value;
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_mte(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp)
vms->mte = value;
}
+#endif /* disabled for RHEL */
static char *virt_get_gic_version(Object *obj, Error **errp)
{
@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
return fixed_ipa ? 0 : requested_pa_size;
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void virt_machine_class_init(ObjectClass *oc, void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc)
vmc->no_pmu = true;
}
DEFINE_VIRT_MACHINE(2, 6)
+#endif /* disabled for RHEL */
+
+static void rhel_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+
+ mc->family = "virt-rhel-Z";
+ mc->init = machvirt_init;
+ /* Maximum supported VCPU count for all virt-rhel* machines */
+ mc->max_cpus = 384;
+#ifdef CONFIG_TPM
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
+#endif
+ mc->block_default_type = IF_VIRTIO;
+ mc->no_cdrom = 1;
+ mc->pci_allow_0_address = true;
+ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
+ mc->minimum_page_bits = 12;
+ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids;
+ mc->cpu_index_to_instance_props = virt_cpu_index_to_props;
+ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57");
+ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
+ mc->kvm_type = virt_kvm_type;
+ assert(!mc->get_hotplug_handler);
+ mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
+ hc->pre_plug = virt_machine_device_pre_plug_cb;
+ hc->plug = virt_machine_device_plug_cb;
+ hc->unplug_request = virt_machine_device_unplug_request_cb;
+ hc->unplug = virt_machine_device_unplug_cb;
+ mc->nvdimm_supported = true;
+ mc->auto_enable_numa_with_memhp = true;
+ mc->auto_enable_numa_with_memdev = true;
+ mc->default_ram_id = "mach-virt.ram";
+
+ object_class_property_add(oc, "acpi", "OnOffAuto",
+ virt_get_acpi, virt_set_acpi,
+ NULL, NULL);
+ object_class_property_set_description(oc, "acpi",
+ "Enable ACPI");
+
+ object_class_property_add_bool(oc, "highmem", virt_get_highmem,
+ virt_set_highmem);
+ object_class_property_set_description(oc, "highmem",
+ "Set on/off to enable/disable using "
+ "physical address space above 32 bits");
+
+ object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
+ virt_set_gic_version);
+ object_class_property_set_description(oc, "gic-version",
+ "Set GIC version. "
+ "Valid values are 2, 3, host and max");
+
+ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu);
+ object_class_property_set_description(oc, "iommu",
+ "Set the IOMMU type. "
+ "Valid values are none and smmuv3");
+
+ object_class_property_add_bool(oc, "default-bus-bypass-iommu",
+ virt_get_default_bus_bypass_iommu,
+ virt_set_default_bus_bypass_iommu);
+ object_class_property_set_description(oc, "default-bus-bypass-iommu",
+ "Set on/off to enable/disable "
+ "bypass_iommu for default root bus");
+
+ object_class_property_add_bool(oc, "ras", virt_get_ras,
+ virt_set_ras);
+ object_class_property_set_description(oc, "ras",
+ "Set on/off to enable/disable reporting host memory errors "
+ "to a KVM guest using ACPI and guest external abort exceptions");
+
+ object_class_property_add_bool(oc, "its", virt_get_its,
+ virt_set_its);
+ object_class_property_set_description(oc, "its",
+ "Set on/off to enable/disable "
+ "ITS instantiation");
+
+ object_class_property_add_str(oc, "x-oem-id",
+ virt_get_oem_id,
+ virt_set_oem_id);
+ object_class_property_set_description(oc, "x-oem-id",
+ "Override the default value of field OEMID "
+ "in ACPI table header."
+ "The string may be up to 6 bytes in size");
+
+
+ object_class_property_add_str(oc, "x-oem-table-id",
+ virt_get_oem_table_id,
+ virt_set_oem_table_id);
+ object_class_property_set_description(oc, "x-oem-table-id",
+ "Override the default value of field OEM Table ID "
+ "in ACPI table header."
+ "The string may be up to 8 bytes in size");
+
+ object_class_property_add_bool(oc, "dtb-kaslr-seed",
+ virt_get_dtb_kaslr_seed,
+ virt_set_dtb_kaslr_seed);
+ object_class_property_set_description(oc, "dtb-kaslr-seed",
+ "Set off to disable passing of kaslr-seed "
+ "dtb node to guest");
+}
+
+static void rhel_virt_instance_init(Object *obj)
+{
+ VirtMachineState *vms = VIRT_MACHINE(obj);
+ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
+
+ /* EL3 is disabled by default and non-configurable for RHEL */
+ vms->secure = false;
+
+ /* EL2 is disabled by default and non-configurable for RHEL */
+ vms->virt = false;
+
+ /* High memory is enabled by default */
+ vms->highmem = true;
+ vms->gic_version = VIRT_GIC_VERSION_NOSEL;
+
+ vms->highmem_ecam = !vmc->no_highmem_ecam;
+
+ if (vmc->no_its) {
+ vms->its = false;
+ } else {
+ /* Default allows ITS instantiation */
+ vms->its = true;
+
+ if (vmc->no_tcg_its) {
+ vms->tcg_its = false;
+ } else {
+ vms->tcg_its = true;
+ }
+ }
+
+ /* Default disallows iommu instantiation */
+ vms->iommu = VIRT_IOMMU_NONE;
+
+ /* The default root bus is attached to iommu by default */
+ vms->default_bus_bypass_iommu = false;
+
+ /* Default disallows RAS instantiation and is non-configurable for RHEL */
+ vms->ras = false;
+
+ /* MTE is disabled by default and non-configurable for RHEL */
+ vms->mte = false;
+
+ /* Supply a kaslr-seed by default */
+ vms->dtb_kaslr_seed = true;
+
+ vms->irqmap = a15irqmap;
+
+ virt_flash_create(vms);
+
+ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
+ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+}
+
+static const TypeInfo rhel_machine_info = {
+ .name = TYPE_RHEL_MACHINE,
+ .parent = TYPE_MACHINE,
+ .abstract = true,
+ .instance_size = sizeof(VirtMachineState),
+ .class_size = sizeof(VirtMachineClass),
+ .class_init = rhel_machine_class_init,
+ .instance_init = rhel_virt_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ },
+};
+
+static void rhel_machine_init(void)
+{
+ type_register_static(&rhel_machine_info);
+}
+type_init(rhel_machine_init);
+
+static void rhel900_virt_options(MachineClass *mc)
+{
+ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len);
+
+ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */
+ vmc->no_tcg_lpa2 = true;
+}
+DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0)
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 7e76ee2619..9b1efe8f0e 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -179,9 +179,17 @@ struct VirtMachineState {
#define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
+#if 0 /* disabled for Red Hat Enterprise Linux */
#define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt")
OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE)
+#else
+#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel")
+typedef struct VirtMachineClass VirtMachineClass;
+typedef struct VirtMachineState VirtMachineState;
+DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE)
+#endif
+
void virt_acpi_setup(VirtMachineState *vms);
bool virt_is_acpi_enabled(VirtMachineState *vms);
--
2.31.1

@ -0,0 +1,528 @@
From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:27:13 +0200
Subject: Add ppc64 machine types
Adding changes to add RHEL machine types for ppc64 architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Rebase notes (6.2.0):
- Fixed rebase conflict relicts
- Update machine type compat for 6.2 (from MR 66)
Merged patches (6.1.0):
- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type
- a3995e2eff Remove RHEL 7.0.0 machine type (only ppc64 changes)
- ad3190a79b Remove RHEL 7.1.0 machine type (only ppc64 changes)
- 84bbe15d4e Remove RHEL 7.2.0 machine type (only ppc64 changes)
- 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes)
- af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes)
- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes)
---
hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++
hw/ppc/spapr_cpu_core.c | 13 +++
include/hw/ppc/spapr.h | 4 +
target/ppc/compat.c | 13 ++-
target/ppc/cpu.h | 1 +
target/ppc/kvm.c | 27 +++++
target/ppc/kvm_ppc.h | 13 +++
7 files changed, 313 insertions(+), 1 deletion(-)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a4372ba189..5fdf8b506d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine)
pef_kvm_reset(machine->cgs, &error_fatal);
spapr_caps_apply(spapr);
+ if (spapr->svm_allowed) {
+ kvmppc_svm_allow(&error_fatal);
+ }
first_ppc_cpu = POWERPC_CPU(first_cpu);
if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
spapr->host_serial = g_strdup(value);
}
+static bool spapr_get_svm_allowed(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return spapr->svm_allowed;
+}
+
+static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ spapr->svm_allowed = value;
+}
+
static void spapr_instance_init(Object *obj)
{
SpaprMachineState *spapr = SPAPR_MACHINE(obj);
@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj)
spapr_get_host_serial, spapr_set_host_serial);
object_property_set_description(obj, "host-serial",
"Host serial number to advertise in guest device tree");
+ object_property_add_bool(obj, "x-svm-allowed",
+ spapr_get_svm_allowed,
+ spapr_set_svm_allowed);
+ object_property_set_description(obj, "x-svm-allowed",
+ "Allow the guest to become a Secure Guest"
+ " (experimental only)");
}
static void spapr_machine_finalizefn(Object *obj)
@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
vmc->client_architecture_support = spapr_vof_client_architecture_support;
vmc->quiesce = spapr_vof_quiesce;
vmc->setprop = spapr_vof_setprop;
+ smc->has_power9_support = true;
}
static const TypeInfo spapr_machine_info = {
@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
} \
type_init(spapr_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/*
* pseries-7.0
*/
@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
}
DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
+#endif
/*
* pseries-4.0
@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
*nv2atsd = 0;
return true;
}
+
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void spapr_machine_4_0_class_options(MachineClass *mc)
{
SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
}
DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
+#endif
+
+static void spapr_machine_rhel_default_class_options(MachineClass *mc)
+{
+ /*
+ * Defaults for the latest behaviour inherited from the base class
+ * can be overriden here for all pseries-rhel* machines.
+ */
+
+ /* Maximum supported VCPU count */
+ mc->max_cpus = 384;
+}
+
+/*
+ * pseries-rhel8.5.0
+ * like pseries-6.0
+ */
+
+static void spapr_machine_rhel850_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ /* The default machine type must apply the RHEL specific defaults */
+ spapr_machine_rhel_default_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5,
+ hw_compat_rhel_8_5_len);
+ smc->pre_6_2_numa_affinity = true;
+ mc->smp_props.prefer_sockets = true;
+}
+
+DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true);
+
+/*
+ * pseries-rhel8.4.0
+ * like pseries-5.2
+ */
+
+static void spapr_machine_rhel840_class_options(MachineClass *mc)
+{
+ spapr_machine_rhel850_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_4,
+ hw_compat_rhel_8_4_len);
+}
+
+DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false);
+
+/*
+ * pseries-rhel8.3.0
+ * like pseries-5.1
+ */
+
+static void spapr_machine_rhel830_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_rhel840_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3,
+ hw_compat_rhel_8_3_len);
+
+ /* from pseries-5.1 */
+ smc->pre_5_2_numa_associativity = true;
+}
+
+DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false);
+
+/*
+ * pseries-rhel8.2.0
+ * like pseries-4.2 + pseries-5.0
+ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0
+ */
+
+static void spapr_machine_rhel820_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ /* from pseries-5.0 */
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
+ };
+
+ spapr_machine_rhel830_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2,
+ hw_compat_rhel_8_2_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+ /* from pseries-4.2 */
+ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF;
+ smc->rma_limit = 16 * GiB;
+ mc->nvdimm_supported = false;
+
+ /* from pseries-5.0 */
+ mc->numa_mem_supported = true;
+ smc->pre_5_1_assoc_refpoints = true;
+}
+
+DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false);
+
+/*
+ * pseries-rhel8.1.0
+ * like pseries-4.1
+ */
+
+static void spapr_machine_rhel810_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ /* Only allow 4kiB and 64kiB IOMMU pagesizes */
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" },
+ };
+
+ spapr_machine_rhel820_class_options(mc);
+
+ /* from pseries-4.1 */
+ smc->linux_pci_probe = false;
+ smc->smp_threads_vsmt = false;
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_1,
+ hw_compat_rhel_8_1_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+ /* from pseries-4.2 */
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+}
+
+DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false);
+
+/*
+ * pseries-rhel8.0.0
+ * like pseries-3.1 and pseries-4.0
+ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS
+ * that have been backported to pseries-rhel8.0.0
+ */
+
+static void spapr_machine_rhel800_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_rhel810_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_0,
+ hw_compat_rhel_8_0_len);
+
+ /* pseries-4.0 */
+ smc->phb_placement = phb_placement_4_0;
+ smc->irq = &spapr_irq_xics;
+ smc->pre_4_1_migration = true;
+
+ /* pseries-3.1 */
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+ smc->update_dt_enabled = false;
+ smc->dr_phb_enabled = false;
+ smc->broken_host_serial_model = true;
+ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+}
+
+DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false);
+
+/*
+ * pseries-rhel7.6.0
+ * like spapr_compat_2_12 and spapr_compat_3_0
+ * spapr_compat_0 is empty
+ */
+GlobalProperty spapr_compat_rhel7_6[] = {
+ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" },
+ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" },
+};
+const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6);
+
+
+static void spapr_machine_rhel760_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_rhel800_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len);
+ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len);
+
+ /* from spapr_machine_3_0_class_options() */
+ smc->legacy_irq_allocation = true;
+ smc->nr_xirqs = 0x400;
+ smc->irq = &spapr_irq_xics_legacy;
+
+ /* from spapr_machine_2_12_class_options() */
+ /* We depend on kvm_enabled() to choose a default value for the
+ * hpt-max-page-size capability. Of course we can't do it here
+ * because this is too early and the HW accelerator isn't initialzed
+ * yet. Postpone this to machine init (see default_caps_with_cpu()).
+ */
+ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
+
+ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by
+ * f21757edc554
+ * "Enable mitigations by default for pseries-4.0 machine type")
+ */
+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
+}
+
+DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false);
+
+/*
+ * pseries-rhel7.6.0-sxxm
+ *
+ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default
+ */
+
+static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_rhel760_class_options(mc);
+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD;
+}
+
+DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false);
static void spapr_machine_register_types(void)
{
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index fcb5dfe792..ab8fb5bf62 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -25,6 +25,7 @@
#include "sysemu/reset.h"
#include "sysemu/hw_accel.h"
#include "qemu/error-report.h"
+#include "cpu-models.h"
static void spapr_reset_vcpu(PowerPCCPU *cpu)
{
@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
{
CPUPPCState *env = &cpu->env;
CPUState *cs = CPU(cpu);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
return false;
@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
/* Set time-base frequency to 512 MHz. vhyp must be set first. */
cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
+ if (!smc->has_power9_support &&
+ (((spapr->max_compat_pvr &&
+ ppc_compat_cmp(spapr->max_compat_pvr,
+ CPU_POWERPC_LOGICAL_3_00) >= 0)) ||
+ (!spapr->max_compat_pvr &&
+ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "POWER9 CPU is not supported by this machine class");
+ return false;
+ }
+
if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
qdev_unrealize(DEVICE(cpu));
return false;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index f5c33dcc86..4a68e0a901 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -154,6 +154,7 @@ struct SpaprMachineClass {
bool pre_5_2_numa_associativity;
bool pre_6_2_numa_affinity;
+ bool has_power9_support;
bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
uint64_t *buid, hwaddr *pio,
hwaddr *mmio32, hwaddr *mmio64,
@@ -241,6 +242,9 @@ struct SpaprMachineState {
/* Set by -boot */
char *boot_device;
+ /* Secure Guest support via x-svm-allowed */
+ bool svm_allowed;
+
/*< public >*/
char *kvm_type;
char *host_model;
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index 7949a24f5a..f207a9ba01 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr)
return NULL;
}
+long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2)
+{
+ const CompatInfo *compat1 = compat_by_pvr(pvr1);
+ const CompatInfo *compat2 = compat_by_pvr(pvr2);
+
+ g_assert(compat1);
+ g_assert(compat2);
+
+ return compat1 - compat2;
+}
+
static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr,
- uint32_t min_compat_pvr, uint32_t max_compat_pvr)
+ uint32_t min_compat_pvr, uint32_t max_compat_pvr)
{
const CompatInfo *compat = compat_by_pvr(compat_pvr);
const CompatInfo *min = compat_by_pvr(min_compat_pvr);
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 047b24ba50..79c5ac50b9 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch)
/* Compatibility modes */
#if defined(TARGET_PPC64)
+long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2);
bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr,
uint32_t min_compat_pvr, uint32_t max_compat_pvr);
bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr,
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index dc93b99189..154888cce5 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv;
static int cap_large_decr;
static int cap_fwnmi;
static int cap_rpt_invalidate;
+static int cap_ppc_secure_guest;
static uint32_t debug_inst_opcode;
@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
kvmppc_get_cpu_characteristics(s);
cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
+ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST);
cap_large_decr = kvmppc_get_dec_bits();
cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI);
/*
@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void)
return cap_rpt_invalidate;
}
+bool kvmppc_has_cap_secure_guest(void)
+{
+ return !!cap_ppc_secure_guest;
+}
+
+int kvmppc_enable_cap_secure_guest(void)
+{
+ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1);
+}
+
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
{
uint32_t host_pvr = mfpvr();
@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void)
{
return true;
}
+
+void kvmppc_svm_allow(Error **errp)
+{
+ if (!kvm_enabled()) {
+ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off");
+ return;
+ }
+
+ if (!kvmppc_has_cap_secure_guest()) {
+ error_setg(errp, "KVM implementation does not support secure guests, "
+ "try x-svm-allowed=off");
+ } else if (kvmppc_enable_cap_secure_guest() < 0) {
+ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off");
+ }
+}
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index ee9325bf9a..20dbb95989 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu);
target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
bool radix, bool gtse,
uint64_t proc_tbl);
+void kvmppc_svm_allow(Error **errp);
#ifndef CONFIG_USER_ONLY
bool kvmppc_spapr_use_multitce(void);
int kvmppc_spapr_enable_inkernel_multitce(void);
@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void);
int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable);
int kvmppc_has_cap_rpt_invalidate(void);
int kvmppc_enable_hwrng(void);
+bool kvmppc_has_cap_secure_guest(void);
+int kvmppc_enable_cap_secure_guest(void);
int kvmppc_put_books_sregs(PowerPCCPU *cpu);
PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
void kvmppc_check_papr_resize_hpt(Error **errp);
@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void)
return false;
}
+static inline bool kvmppc_has_cap_secure_guest(void)
+{
+ return false;
+}
+
+static inline int kvmppc_enable_cap_secure_guest(void)
+{
+ return -1;
+}
+
static inline int kvmppc_enable_hwrng(void)
{
return -1;
--
2.31.1

@ -0,0 +1,186 @@
From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:47:32 +0200
Subject: Add s390x machine types
Adding changes to add RHEL machine types for s390x architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
--
Merged patches (6.1.0):
- 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type
- 395516d62b redhat: s390x: add rhel-8.5.0 compat machine
Merged patches (6.2.0):
- 3bf66f4520 redhat: Add s390x machine type compatibility update for 6.1 rebase
Merged patches (7.0.0):
- e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2
- 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x
- dcc64971bf RHEL: mark old machine types as deprecated (partialy)
---
hw/core/machine.c | 6 +++
hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++-
include/hw/boards.h | 2 +
3 files changed, 111 insertions(+), 1 deletion(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ea430d844e..77202a3570 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,6 +37,12 @@
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-pci.h"
+/*
+ * RHEL only: machine types for previous major releases are deprecated
+ */
+const char *rhel_old_machine_deprecation =
+ "machine types for previous major releases are deprecated";
+
/*
* Mostly the same as hw_compat_6_0 and hw_compat_6_1
*/
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 90480e7cf9..ec4176a1e0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -767,7 +767,7 @@ bool css_migration_enabled(void)
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
ccw_machine_##suffix##_class_options(mc); \
- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \
+ mc->desc = "VirtIO-ccw based S390 machine " verstr; \
if (latest) { \
mc->alias = "s390-ccw-virtio"; \
mc->is_default = true; \
@@ -791,6 +791,7 @@ bool css_migration_enabled(void)
} \
type_init(ccw_machine_register_##suffix)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void ccw_machine_7_0_instance_options(MachineState *machine)
{
}
@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
}
DEFINE_CCW_MACHINE(2_4, "2.4", false);
+#endif
+
+static void ccw_machine_rhel900_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_rhel900_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
+
+static void ccw_machine_rhel860_instance_options(MachineState *machine)
+{
+ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */
+ ccw_machine_rhel900_instance_options(machine);
+}
+
+static void ccw_machine_rhel860_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel900_class_options(mc);
+
+ /* All RHEL machines for prior major releases are deprecated */
+ mc->deprecation_reason = rhel_old_machine_deprecation;
+}
+DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false);
+
+static void ccw_machine_rhel850_instance_options(MachineState *machine)
+{
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 };
+
+ ccw_machine_rhel860_instance_options(machine);
+
+ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat);
+
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP);
+ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI);
+}
+
+static void ccw_machine_rhel850_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel860_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len);
+ mc->smp_props.prefer_sockets = true;
+}
+DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false);
+
+static void ccw_machine_rhel840_instance_options(MachineState *machine)
+{
+ ccw_machine_rhel850_instance_options(machine);
+}
+
+static void ccw_machine_rhel840_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel850_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len);
+}
+DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false);
+
+static void ccw_machine_rhel820_instance_options(MachineState *machine)
+{
+ ccw_machine_rhel840_instance_options(machine);
+}
+
+static void ccw_machine_rhel820_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel840_class_options(mc);
+ mc->fixup_ram_size = s390_fixup_ram_size;
+ /* we did not publish a rhel8.3.0 machine */
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len);
+}
+DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false);
+
+static void ccw_machine_rhel760_instance_options(MachineState *machine)
+{
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 };
+
+ ccw_machine_rhel820_instance_options(machine);
+
+ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat);
+
+ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */
+ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH);
+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE);
+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE);
+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE);
+ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE);
+}
+
+static void ccw_machine_rhel760_class_options(MachineClass *mc)
+{
+ ccw_machine_rhel820_class_options(mc);
+ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len);
+ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len);
+}
+DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false);
static void ccw_machine_register_types(void)
{
diff --git a/include/hw/boards.h b/include/hw/boards.h
index c90a19b4d1..bf59275f18 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len;
extern GlobalProperty hw_compat_rhel_7_6[];
extern const size_t hw_compat_rhel_7_6_len;
+extern const char *rhel_old_machine_deprecation;
+
#endif
--
2.31.1

@ -0,0 +1,714 @@
From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Fri, 19 Oct 2018 13:10:31 +0200
Subject: Add x86_64 machine types
Adding changes to add RHEL machine types for x86_64 architecture.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Rebase notes (6.1.0):
- Update qemu64 cpu spec
Rebase notes (7.0.0):
- Reset alias for all machine-types except latest one
Merged patches (6.1.0):
- 59c284ad3b x86: Add x86 rhel8.5 machine types
- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default
- a3995e2eff Remove RHEL 7.0.0 machine type (only x86_64 changes)
- ad3190a79b Remove RHEL 7.1.0 machine type (only x86_64 changes)
- 84bbe15d4e Remove RHEL 7.2.0 machine type (only x86_64 changes)
- 0215eb3356 Remove RHEL 7.3.0 machine types (only x86_64 changes)
- af69d1ca6e Remove RHEL 7.4.0 machine types (only x86_64 changes)
- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only x86_64 changes)
Merged patches (7.0.0):
- eae7d8dd3c x86/rhel machine types: Add pc_rhel_8_5_compat
- 6762f56469 x86/rhel machine types: Wire compat into q35 and i440fx
- 5762101438 rhel machine types/x86: set prefer_sockets
- 9ba9ddc632 x86: Add q35 RHEL 8.6.0 machine type
- 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type
- dcc64971bf RHEL: mark old machine types as deprecated (partialy)
- 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0
---
hw/core/machine.c | 10 ++
hw/i386/pc.c | 135 +++++++++++++++++++++-
hw/i386/pc_piix.c | 79 ++++++++++++-
hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++-
hw/s390x/s390-virtio-ccw.c | 1 +
include/hw/boards.h | 5 +
include/hw/i386/pc.h | 24 ++++
target/i386/kvm/kvm-cpu.c | 1 +
target/i386/kvm/kvm.c | 4 +
tests/qtest/pvpanic-test.c | 5 +-
10 files changed, 484 insertions(+), 7 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 77202a3570..28989b6e7b 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -43,6 +43,16 @@
const char *rhel_old_machine_deprecation =
"machine types for previous major releases are deprecated";
+GlobalProperty hw_compat_rhel_8_6[] = {
+ /* hw_compat_rhel_8_6 bz 2065589 */
+ /*
+ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so
+ * we need do disable it downstream on the latest hw_compat_rhel_8.
+ */
+ { "vhost-vsock-device", "seqpacket", "off" },
+};
+const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
+
/*
* Mostly the same as hw_compat_6_0 and hw_compat_6_1
*/
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index fd55fc725c..263d882af6 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = {
};
const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
+/* This macro is for changes to properties that are RHEL specific,
+ * different to the current upstream and to be applied to the latest
+ * machine type.
+ */
+GlobalProperty pc_rhel_compat[] = {
+ { TYPE_X86_CPU, "host-phys-bits", "on" },
+ { TYPE_X86_CPU, "host-phys-bits-limit", "48" },
+ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" },
+ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" },
+ /* bz 1508330 */
+ { "vfio-pci", "x-no-geforce-quirks", "on" },
+ /* bz 1941397 */
+ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" },
+};
+const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+
+GlobalProperty pc_rhel_8_5_compat[] = {
+ /* pc_rhel_8_5_compat from pc_compat_6_0 */
+ { "qemu64" "-" TYPE_X86_CPU, "family", "6" },
+ /* pc_rhel_8_5_compat from pc_compat_6_0 */
+ { "qemu64" "-" TYPE_X86_CPU, "model", "6" },
+ /* pc_rhel_8_5_compat from pc_compat_6_0 */
+ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" },
+ /* pc_rhel_8_5_compat from pc_compat_6_0 */
+ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" },
+ /* pc_rhel_8_5_compat from pc_compat_6_0 */
+ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" },
+
+ /* pc_rhel_8_5_compat from pc_compat_6_1 */
+ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" },
+ /* pc_rhel_8_5_compat from pc_compat_6_1 */
+ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" },
+ /* pc_rhel_8_5_compat from pc_compat_6_1 */
+ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" },
+};
+const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat);
+
+GlobalProperty pc_rhel_8_4_compat[] = {
+ /* pc_rhel_8_4_compat from pc_compat_5_2 */
+ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" },
+ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" },
+};
+const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat);
+
+GlobalProperty pc_rhel_8_3_compat[] = {
+ /* pc_rhel_8_3_compat from pc_compat_5_1 */
+ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" },
+};
+const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat);
+
+GlobalProperty pc_rhel_8_2_compat[] = {
+ /* pc_rhel_8_2_compat from pc_compat_4_2 */
+ { "mch", "smbase-smram", "off" },
+};
+const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat);
+
+/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */
+GlobalProperty pc_rhel_8_1_compat[] = { };
+const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat);
+
+GlobalProperty pc_rhel_8_0_compat[] = {
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "intel-iommu", "dma-drain", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" },
+ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" },
+ /* pc_rhel_8_0_compat from pc_compat_3_1 */
+ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" },
+};
+const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat);
+
+/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but:
+ * all of the 2_12 stuff was already in 7.6 from bz 1481253
+ * x-migrate-smi-count comes from PC_COMPAT_2_11 but
+ * is really tied to kernel version so keep it off on 7.x
+ * machine types irrespective of host.
+ */
+GlobalProperty pc_rhel_7_6_compat[] = {
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_3_0 */
+ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { TYPE_X86_CPU, "x-migrate-smi-count", "off" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" },
+ /* pc_rhel_7_6_compat from pc_compat_2_11 */
+ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" },
+};
+const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat);
+
+/*
+ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine
+ * types as the PC_COMPAT_* do for upstream types.
+ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types.
+ */
+
GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled)
{
GSIState *s;
@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->pvh_enabled = true;
pcmc->kvmclock_create_always = true;
assert(!mc->get_hotplug_handler);
+ mc->async_pf_vmexit_disable = false;
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->has_hotpluggable_cpus = true;
mc->default_boot_order = "cad";
mc->block_default_type = IF_IDE;
- mc->max_cpus = 255;
+ /* 240: max CPU count for RHEL */
+ mc->max_cpus = 240;
mc->reset = pc_machine_reset;
mc->wakeup = pc_machine_wakeup;
hc->pre_plug = pc_machine_device_pre_plug_cb;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c797e98312..0cacc0d623 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -50,6 +50,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "sysemu/xen.h"
+#include "migration/migration.h"
#ifdef CONFIG_XEN
#include <xen/hvm/hvm_info_table.h>
#include "hw/xen/xen_pt.h"
@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine,
if (pcmc->smbios_defaults) {
MachineClass *mc = MACHINE_GET_CLASS(machine);
/* These values are guest ABI, do not change */
- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)",
- mc->name, pcmc->smbios_legacy_mode,
+ smbios_set_defaults("Red Hat", "KVM",
+ mc->desc, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine,
* hw_compat_*, pc_compat_*, or * pc_*_machine_options().
*/
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void pc_compat_2_3_fn(MachineState *machine)
{
X86MachineState *x86ms = X86_MACHINE(machine);
@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m)
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
xenfv_3_1_machine_options);
#endif
+#endif /* Disabled for Red Hat Enterprise Linux */
+
+/* Red Hat Enterprise Linux machine types */
+
+/* Options for the latest rhel7 machine type */
+static void pc_machine_rhel7_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ m->family = "pc_piix_Y";
+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+ pcmc->default_nic_model = "e1000";
+ pcmc->pci_root_uid = 0;
+ m->default_display = "std";
+ m->no_parallel = 1;
+ m->numa_mem_supported = true;
+ m->auto_enable_numa_with_memdev = false;
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
+ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
+ m->alias = "pc";
+ m->is_default = 1;
+ m->smp_props.prefer_sockets = true;
+}
+
+static void pc_init_rhel760(MachineState *machine)
+{
+ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \
+ TYPE_I440FX_PCI_DEVICE);
+}
+
+static void pc_machine_rhel760_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_machine_rhel7_options(m);
+ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)";
+ m->async_pf_vmexit_disable = true;
+ m->smbus_no_migration_support = true;
+
+ /* All RHEL machines for prior major releases are deprecated */
+ m->deprecation_reason = rhel_old_machine_deprecation;
+
+ pcmc->pvh_enabled = false;
+ pcmc->default_cpu_version = CPU_VERSION_LEGACY;
+ pcmc->kvmclock_create_always = false;
+ /* From pc_i440fx_5_1_machine_options() */
+ pcmc->pci_root_uid = 1;
+ compat_props_add(m->compat_props, hw_compat_rhel_8_6,
+ hw_compat_rhel_8_6_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_5,
+ hw_compat_rhel_8_5_len);
+ compat_props_add(m->compat_props, pc_rhel_8_5_compat,
+ pc_rhel_8_5_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_4,
+ hw_compat_rhel_8_4_len);
+ compat_props_add(m->compat_props, pc_rhel_8_4_compat,
+ pc_rhel_8_4_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_3,
+ hw_compat_rhel_8_3_len);
+ compat_props_add(m->compat_props, pc_rhel_8_3_compat,
+ pc_rhel_8_3_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_2,
+ hw_compat_rhel_8_2_len);
+ compat_props_add(m->compat_props, pc_rhel_8_2_compat,
+ pc_rhel_8_2_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len);
+ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len);
+ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len);
+ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len);
+ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len);
+}
+
+DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760,
+ pc_machine_rhel760_options);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index b695f88c45..157160e069 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine)
if (pcmc->smbios_defaults) {
/* These values are guest ABI, do not change */
- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)",
- mc->name, pcmc->smbios_legacy_mode,
+ smbios_set_defaults("Red Hat", "KVM",
+ mc->desc, pcmc->smbios_legacy_mode,
pcmc->smbios_uuid_encoded,
pcmc->smbios_stream_product,
pcmc->smbios_stream_version,
@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine)
DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn)
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void pc_q35_machine_options(MachineClass *m)
{
PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m)
DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL,
pc_q35_2_4_machine_options);
+#endif /* Disabled for Red Hat Enterprise Linux */
+
+/* Red Hat Enterprise Linux machine types */
+
+/* Options for the latest rhel q35 machine type */
+static void pc_q35_machine_rhel_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pcmc->default_nic_model = "e1000e";
+ pcmc->pci_root_uid = 0;
+ m->family = "pc_q35_Z";
+ m->units_per_default_bus = 1;
+ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+ m->default_display = "std";
+ m->no_floppy = 1;
+ m->no_parallel = 1;
+ pcmc->default_cpu_version = 1;
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE);
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE);
+ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE);
+ m->alias = "q35";
+ m->max_cpus = 710;
+ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len);
+}
+
+static void pc_q35_init_rhel900(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel900_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel_options(m);
+ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
+ pcmc->smbios_stream_product = "RHEL";
+ pcmc->smbios_stream_version = "9.0.0";
+}
+
+DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900,
+ pc_q35_machine_rhel900_options);
+
+static void pc_q35_init_rhel860(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel860_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel900_options(m);
+ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+
+ /* All RHEL machines for prior major releases are deprecated */
+ m->deprecation_reason = rhel_old_machine_deprecation;
+
+ pcmc->smbios_stream_product = "RHEL-AV";
+ pcmc->smbios_stream_version = "8.6.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_6,
+ hw_compat_rhel_8_6_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860,
+ pc_q35_machine_rhel860_options);
+
+
+static void pc_q35_init_rhel850(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel850_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel860_options(m);
+ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL-AV";
+ pcmc->smbios_stream_version = "8.5.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_5,
+ hw_compat_rhel_8_5_len);
+ compat_props_add(m->compat_props, pc_rhel_8_5_compat,
+ pc_rhel_8_5_compat_len);
+ m->smp_props.prefer_sockets = true;
+}
+
+DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850,
+ pc_q35_machine_rhel850_options);
+
+
+static void pc_q35_init_rhel840(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel840_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel850_options(m);
+ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL-AV";
+ pcmc->smbios_stream_version = "8.4.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_4,
+ hw_compat_rhel_8_4_len);
+ compat_props_add(m->compat_props, pc_rhel_8_4_compat,
+ pc_rhel_8_4_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840,
+ pc_q35_machine_rhel840_options);
+
+
+static void pc_q35_init_rhel830(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel830_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel840_options(m);
+ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = "RHEL-AV";
+ pcmc->smbios_stream_version = "8.3.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_3,
+ hw_compat_rhel_8_3_len);
+ compat_props_add(m->compat_props, pc_rhel_8_3_compat,
+ pc_rhel_8_3_compat_len);
+ /* From pc_q35_5_1_machine_options() */
+ pcmc->kvmclock_create_always = false;
+ /* From pc_q35_5_1_machine_options() */
+ pcmc->pci_root_uid = 1;
+}
+
+DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830,
+ pc_q35_machine_rhel830_options);
+
+static void pc_q35_init_rhel820(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel820_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel830_options(m);
+ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ m->numa_mem_supported = true;
+ m->auto_enable_numa_with_memdev = false;
+ pcmc->smbios_stream_product = "RHEL-AV";
+ pcmc->smbios_stream_version = "8.2.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_8_2,
+ hw_compat_rhel_8_2_len);
+ compat_props_add(m->compat_props, pc_rhel_8_2_compat,
+ pc_rhel_8_2_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820,
+ pc_q35_machine_rhel820_options);
+
+static void pc_q35_init_rhel810(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel810_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel820_options(m);
+ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)";
+ m->alias = NULL;
+ pcmc->smbios_stream_product = NULL;
+ pcmc->smbios_stream_version = NULL;
+ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len);
+ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810,
+ pc_q35_machine_rhel810_options);
+
+static void pc_q35_init_rhel800(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel800_options(MachineClass *m)
+{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+ pc_q35_machine_rhel810_options(m);
+ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)";
+ m->smbus_no_migration_support = true;
+ m->alias = NULL;
+ pcmc->pvh_enabled = false;
+ pcmc->default_cpu_version = CPU_VERSION_LEGACY;
+ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len);
+ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800,
+ pc_q35_machine_rhel800_options);
+
+static void pc_q35_init_rhel760(MachineState *machine)
+{
+ pc_q35_init(machine);
+}
+
+static void pc_q35_machine_rhel760_options(MachineClass *m)
+{
+ pc_q35_machine_rhel800_options(m);
+ m->alias = NULL;
+ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)";
+ m->async_pf_vmexit_disable = true;
+ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len);
+ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len);
+}
+
+DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760,
+ pc_q35_machine_rhel760_options);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index ec4176a1e0..465a2a09d2 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
static void ccw_machine_rhel860_class_options(MachineClass *mc)
{
ccw_machine_rhel900_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
/* All RHEL machines for prior major releases are deprecated */
mc->deprecation_reason = rhel_old_machine_deprecation;
diff --git a/include/hw/boards.h b/include/hw/boards.h
index bf59275f18..d1555665df 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -266,6 +266,8 @@ struct MachineClass {
strList *allowed_dynamic_sysbus_devices;
bool auto_enable_numa_with_memhp;
bool auto_enable_numa_with_memdev;
+ /* RHEL only */
+ bool async_pf_vmexit_disable;
bool ignore_boot_device_suffixes;
bool smbus_no_migration_support;
bool nvdimm_supported;
@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_8_6[];
+extern const size_t hw_compat_rhel_8_6_len;
+
extern GlobalProperty hw_compat_rhel_8_5[];
extern const size_t hw_compat_rhel_8_5_len;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 91331059d9..419a6ec24b 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len;
extern GlobalProperty pc_compat_1_4[];
extern const size_t pc_compat_1_4_len;
+extern GlobalProperty pc_rhel_compat[];
+extern const size_t pc_rhel_compat_len;
+
+extern GlobalProperty pc_rhel_8_5_compat[];
+extern const size_t pc_rhel_8_5_compat_len;
+
+extern GlobalProperty pc_rhel_8_4_compat[];
+extern const size_t pc_rhel_8_4_compat_len;
+
+extern GlobalProperty pc_rhel_8_3_compat[];
+extern const size_t pc_rhel_8_3_compat_len;
+
+extern GlobalProperty pc_rhel_8_2_compat[];
+extern const size_t pc_rhel_8_2_compat_len;
+
+extern GlobalProperty pc_rhel_8_1_compat[];
+extern const size_t pc_rhel_8_1_compat_len;
+
+extern GlobalProperty pc_rhel_8_0_compat[];
+extern const size_t pc_rhel_8_0_compat_len;
+
+extern GlobalProperty pc_rhel_7_6_compat[];
+extern const size_t pc_rhel_7_6_compat_len;
+
/* Helper for setting model-id for CPU models that changed model-id
* depending on QEMU versions up to QEMU 2.4.
*/
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index 5eb955ce9a..74c1396a93 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = {
{ "acpi", "off" },
{ "monitor", "off" },
{ "svm", "off" },
+ { "kvm-pv-unhalt", "on" },
{ NULL, NULL },
};
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 9cf8e03669..6d1e009443 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu)
struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries;
int ret, i;
uint64_t mtrr_top_bits;
+ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
kvm_msr_buf_reset(cpu);
@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu)
break;
case MSR_KVM_ASYNC_PF_EN:
env->async_pf_en_msr = msrs[i].data;
+ if (mc->async_pf_vmexit_disable) {
+ env->async_pf_en_msr &= ~(1ULL << 2);
+ }
break;
case MSR_KVM_ASYNC_PF_INT:
env->async_pf_int_msr = msrs[i].data;
diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c
index 6dcad2db49..580c2c43d2 100644
--- a/tests/qtest/pvpanic-test.c
+++ b/tests/qtest/pvpanic-test.c
@@ -17,7 +17,7 @@ static void test_panic_nopause(void)
QDict *response, *data;
QTestState *qts;
- qts = qtest_init("-device pvpanic -action panic=none");
+ qts = qtest_init("-M q35 -device pvpanic -action panic=none");
val = qtest_inb(qts, 0x505);
g_assert_cmpuint(val, ==, 3);
@@ -40,7 +40,8 @@ static void test_panic(void)
QDict *response, *data;
QTestState *qts;
- qts = qtest_init("-device pvpanic -action panic=pause");
+ /* RHEL: Use q35 */
+ qts = qtest_init("-M q35 -device pvpanic -action panic=pause");
val = qtest_inb(qts, 0x505);
g_assert_cmpuint(val, ==, 3);
--
2.31.1

@ -0,0 +1,186 @@
From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 2 Sep 2020 09:39:41 +0200
Subject: Enable make check
Fixing tests after device disabling and machine types changes and enabling
make check run during build.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
Rebase changes (6.1.0):
- removed unnecessary test changes
Rebase changes (6.2.0):
- new way of disabling bios-table-test
Rebase changes (7.0.0):
- Disable testing virtio-iommu-pci
- Rename default_bus_bypass_iommu property to default-bus-bypass-iommu
- Disable qtest-bios-table for aarch64
- Removed redhat chunks for boot-serial-test.c, cdrom-test.c and cpu-plug-test.c qtests
- Do not disable boot-order-test, prom-env-test and boot-serial-test qtests
- Use rhel machine type for new intel hda qtest
- Remove unnecessary changes in iotest 051
- Remove changes in bios-tables-test.c and prom-env-test.c qtests
Merged patches (6.1.0):
- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again
---
.distro/qemu-kvm.spec.template | 5 ++---
tests/qtest/fuzz-e1000e-test.c | 2 +-
tests/qtest/fuzz-virtio-scsi-test.c | 2 +-
tests/qtest/intel-hda-test.c | 2 +-
tests/qtest/libqos/meson.build | 2 +-
tests/qtest/lpc-ich9-test.c | 2 +-
tests/qtest/meson.build | 4 ----
tests/qtest/usb-hcd-xhci-test.c | 4 ++++
tests/qtest/virtio-net-failover.c | 1 +
9 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
index 66229e6096..947fba73b7 100644
--- a/tests/qtest/fuzz-e1000e-test.c
+++ b/tests/qtest/fuzz-e1000e-test.c
@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void)
{
QTestState *s;
- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0");
+ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0");
qtest_outl(s, 0xcf8, 0x80001010);
qtest_outl(s, 0xcfc, 0xe1020000);
diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c
index aaf6d10e18..43727d62ac 100644
--- a/tests/qtest/fuzz-virtio-scsi-test.c
+++ b/tests/qtest/fuzz-virtio-scsi-test.c
@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void)
{
QTestState *s;
- s = qtest_init("-M pc-q35-5.2 -display none -m 512M "
+ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M "
"-device virtio-scsi,num_queues=8,addr=03.0 ");
qtest_outl(s, 0xcf8, 0x80001811);
diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c
index a58c98e4d1..c8387e39ce 100644
--- a/tests/qtest/intel-hda-test.c
+++ b/tests/qtest/intel-hda-test.c
@@ -38,7 +38,7 @@ static void test_issue542_ich6(void)
{
QTestState *s;
- s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 "
+ s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 "
"-device intel-hda,id=" HDA_ID CODEC_DEVICES);
qtest_outl(s, 0xcf8, 0x80000804);
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index e988d15791..46f7dcb81a 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c',
'virtio-rng.c',
'virtio-scsi.c',
'virtio-serial.c',
- 'virtio-iommu.c',
+# 'virtio-iommu.c',
# qgraph machines:
'aarch64-xlnx-zcu102-machine.c',
diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c
index fe0bef9980..7a9d51579b 100644
--- a/tests/qtest/lpc-ich9-test.c
+++ b/tests/qtest/lpc-ich9-test.c
@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void)
{
QTestState *s;
- s = qtest_init("-M pc-q35-5.0 "
+ s = qtest_init("-M pc-q35-rhel8.4.0 "
"-nographic -monitor none -serial none");
qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index d25f82bb5a..67cd32def1 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -73,7 +73,6 @@ qtests_i386 = \
config_all_devices.has_key('CONFIG_Q35') and \
config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \
slirp.found() ? ['virtio-net-failover'] : []) + \
- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
qtests_pci + \
['fdc-test',
'ide-test',
@@ -86,7 +85,6 @@ qtests_i386 = \
'drive_del-test',
'tco-test',
'cpu-plug-test',
- 'q35-test',
'vmgenid-test',
'migration-test',
'test-x86-cpuid-compat',
@@ -216,7 +214,6 @@ qtests_arm = \
# TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional
qtests_aarch64 = \
- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \
(config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \
(config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \
@@ -231,7 +228,6 @@ qtests_s390x = \
(config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \
['boot-serial-test',
'drive_del-test',
- 'device-plug-test',
'virtio-ccw-test',
'cpu-plug-test',
'migration-test']
diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c
index 10ef9d2a91..3855873050 100644
--- a/tests/qtest/usb-hcd-xhci-test.c
+++ b/tests/qtest/usb-hcd-xhci-test.c
@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void)
usb_test_hotplug(global_qtest, "xhci", "1", NULL);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void test_usb_uas_hotplug(void)
{
QTestState *qts = global_qtest;
@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void)
qtest_qmp_device_del(qts, "scsihd");
qtest_qmp_device_del(qts, "uas");
}
+#endif
static void test_usb_ccid_hotplug(void)
{
@@ -56,7 +58,9 @@ int main(int argc, char **argv)
qtest_add_func("/xhci/pci/init", test_xhci_init);
qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug);
+#if 0 /* Disabled for Red Hat Enterprise Linux */
qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug);
+#endif
qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug);
qtest_start("-device nec-usb-xhci,id=xhci"
diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c
index 78811f1c92..44de8af00c 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -25,6 +25,7 @@
#define PCI_SEL_BASE 0x0010
#define BASE_MACHINE "-M q35 -nodefaults " \
+ "-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=on " \
"-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \
"-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 "
--
2.31.1

@ -0,0 +1,104 @@
From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 3 Dec 2013 20:05:13 +0100
Subject: vfio: cap number of devices that can be assigned
RH-Author: Bandan Das <bsd@redhat.com>
Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com>
Patchwork-id: 55984
O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned
Bugzilla: 678368
RH-Acked-by: Alex Williamson <alex.williamson@redhat.com>
RH-Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
RH-Acked-by: Michael S. Tsirkin <mst@redhat.com>
Go through all groups to get count of total number of devices
active to enforce limit
Reasoning from Alex for the limit(32) - Assuming 3 slots per
device, with 125 slots (number of memory slots for RHEL 7),
we can support almost 40 devices and still have few slots left
for other uses. Stepping down a bit, the number 32 arbitrarily
matches the number of slots on a PCI bus and is also a nice power
of two.
Count of slots increased to 509 later so we could increase limit
to 64 as some usecases require more than 32 devices.
Signed-off-by: Bandan Das <bsd@redhat.com>
---
hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++-
hw/vfio/pci.h | 1 +
2 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 67a183f17b..1e20f9fd59 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -45,6 +45,9 @@
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+/* RHEL only: Set once for the first assigned dev */
+static uint16_t device_limit;
+
static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
ssize_t len;
struct stat st;
int groupid;
- int i, ret;
+ int ret, i = 0;
bool is_mdev;
+ if (device_limit && device_limit != vdev->assigned_device_limit) {
+ error_setg(errp, "Assigned device limit has been redefined. "
+ "Old:%d, New:%d",
+ device_limit, vdev->assigned_device_limit);
+ return;
+ } else {
+ device_limit = vdev->assigned_device_limit;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ i++;
+ }
+ }
+
+ if (i >= vdev->assigned_device_limit) {
+ error_setg(errp, "Maximum supported vfio devices (%d) "
+ "already attached", vdev->assigned_device_limit);
+ return;
+ }
+
if (!vdev->vbasedev.sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
~vdev->host.slot || ~vdev->host.function)) {
@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice,
no_geforce_quirks, false),
+ /* RHEL only */
+ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice,
+ assigned_device_limit, 64),
DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd,
false),
DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 64777516d1..e0fe6ca97e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -139,6 +139,7 @@ struct VFIOPCIDevice {
EventNotifier err_notifier;
EventNotifier req_notifier;
int (*resetfn)(struct VFIOPCIDevice *);
+ uint16_t assigned_device_limit;
uint32_t vendor_id;
uint32_t device_id;
uint32_t sub_vendor_id;
--
2.31.1

@ -0,0 +1,55 @@
From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001
From: Eduardo Habkost <ehabkost@redhat.com>
Date: Wed, 4 Dec 2013 18:53:17 +0100
Subject: Add support statement to -help output
RH-Author: Eduardo Habkost <ehabkost@redhat.com>
Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com>
Patchwork-id: 55994
O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output
Bugzilla: 972773
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: knoel@redhat.com
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Add support statement to -help output, reporting direct qemu-kvm usage
as unsupported by Red Hat, and advising users to use libvirt instead.
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
---
softmmu/vl.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 6f646531a0..9d5dab43d2 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -831,9 +831,17 @@ static void version(void)
QEMU_COPYRIGHT "\n");
}
+static void print_rh_warning(void)
+{
+ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n"
+ "WARNING: Use libvirt as the stable management interface.\n"
+ "WARNING: Some command line options listed here may not be available in future releases.\n\n");
+}
+
static void help(int exitcode)
{
version();
+ print_rh_warning();
printf("usage: %s [options] [disk_image]\n\n"
"'disk_image' is a raw hard disk image for IDE hard disk 0\n\n",
g_get_prgname());
@@ -859,6 +867,7 @@ static void help(int exitcode)
"\n"
QEMU_HELP_BOTTOM "\n");
+ print_rh_warning();
exit(exitcode);
}
--
2.31.1

@ -0,0 +1,45 @@
From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Tue, 21 Jan 2014 10:46:52 +0100
Subject: globally limit the maximum number of CPUs
We now globally limit the number of VCPUs.
Especially, there is no way one can specify more than
max_cpus VCPUs for a VM.
This allows us the restore the ppc max_cpus limitation to the upstream
default and minimize the ppc hack in kvm-all.c.
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Signed-off-by: Danilo Cesar Lemes de Paula <ddepaula@redhat.com>
---
accel/kvm/kvm-all.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 5f1377ca04..fdf0e4d429 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms)
soft_vcpus_limit = kvm_recommended_vcpus(s);
hard_vcpus_limit = kvm_max_vcpus(s);
+#ifdef HOST_PPC64
+ /*
+ * On POWER, the kernel advertises a soft limit based on the
+ * number of CPU threads on the host. We want to allow exceeding
+ * this for testing purposes, so we don't want to set hard limit
+ * to soft limit as on x86.
+ */
+#else
+ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */
+ hard_vcpus_limit = soft_vcpus_limit;
+#endif
+
while (nc->name) {
if (nc->num > soft_vcpus_limit) {
warn_report("Number of %s cpus requested (%d) exceeds "
--
2.31.1

@ -0,0 +1,61 @@
From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 8 Jul 2020 08:35:50 +0200
Subject: Use qemu-kvm in documentation instead of qemu-system-<arch>
Patchwork-id: 62380
O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386
Bugzilla: 1140620
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
RH-Acked-by: Markus Armbruster <armbru@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
From: Miroslav Rezanina <mrezanin@redhat.com>
We change the name and location of qemu-kvm binaries. Update documentation
to reflect this change. Only architectures available in RHEL are updated.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
docs/defs.rst.inc | 4 ++--
qemu-options.hx | 10 +++++-----
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc
index 52d6454b93..d74dbdeca9 100644
--- a/docs/defs.rst.inc
+++ b/docs/defs.rst.inc
@@ -9,7 +9,7 @@
but the manpages will end up misrendered with following normal text
incorrectly in boldface.
-.. |qemu_system| replace:: qemu-system-x86_64
-.. |qemu_system_x86| replace:: qemu-system-x86_64
+.. |qemu_system| replace:: qemu-kvm
+.. |qemu_system_x86| replace:: qemu-kvm
.. |I2C| replace:: I\ :sup:`2`\ C
.. |I2S| replace:: I\ :sup:`2`\ S
diff --git a/qemu-options.hx b/qemu-options.hx
index 34e9b32a5c..924f61ab6d 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3233,11 +3233,11 @@ SRST
::
- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
- -numa node,memdev=mem \
- -chardev socket,id=chr0,path=/path/to/socket \
- -netdev type=vhost-user,id=net0,chardev=chr0 \
- -device virtio-net-pci,netdev=net0
+ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \
+ -numa node,memdev=mem \
+ -chardev socket,id=chr0,path=/path/to/socket \
+ -netdev type=vhost-user,id=net0,chardev=chr0 \
+ -device virtio-net-pci,netdev=net0
``-netdev vhost-vdpa,vhostdev=/path/to/dev``
Establish a vhost-vdpa netdev.
--
2.31.1

@ -0,0 +1,66 @@
From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Wed, 14 Jun 2017 15:37:01 +0200
Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]
RH-Author: Fam Zheng <famz@redhat.com>
Message-id: <20170614153701.14757-1-famz@redhat.com>
Patchwork-id: 75613
O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]
Bugzilla: 1378816
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Max Reitz <mreitz@redhat.com>
We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't
ready. If it were, the changes will be too invasive. To have an idea:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html
is an incomplete attempt to fix part of the issue, and the remaining
work unfortunately involve even more complex changes.
As a band-aid, this partially reverts the effect of ef8875b
(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot
simply revert that commit as a whole because we already shipped it in
qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should
only block what has been broken. Also, faithfully reverting the above
commit means adding back the removed op blocker, but that is not enough,
because it still crashes when inserting media into an initially empty
scsi-cd.
All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable
unless the scsi-cd never enters an empty state, so, disable it
altogether. Otherwise it would be much more difficult to avoid
crashing.
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
---
hw/scsi/virtio-scsi.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 34a968ecfb..7f6da33a8a 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
AioContext *old_context;
int ret;
+ /* XXX: Remove this check once block backend is capable of handling
+ * AioContext change upon eject/insert.
+ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if
+ * data plane is not used, both cases are safe for scsi-cd. */
+ if (s->ctx && s->ctx != qemu_get_aio_context() &&
+ object_dynamic_cast(OBJECT(dev), "scsi-cd")) {
+ error_setg(errp, "scsi-cd is not supported by data plane");
+ return;
+ }
if (s->ctx && !s->dataplane_fenced) {
if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
return;
--
2.31.1

@ -0,0 +1,60 @@
From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001
From: David Gibson <dgibson@redhat.com>
Date: Wed, 6 Feb 2019 03:58:56 +0000
Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts
RH-Author: David Gibson <dgibson@redhat.com>
Message-id: <20190206035856.19058-1-dgibson@redhat.com>
Patchwork-id: 84246
O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts
Bugzilla: 1653590
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
RH-Acked-by: Serhii Popovych <spopovyc@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
Most current POWER guests require 64kiB page support, so that's the default
for the cap-hpt-max-pagesize option in qemu which limits available guest
page sizes. We warn if the value is set smaller than that, but don't
outright fail upstream, because we need to allow for the possibility of
guest (and/or host) kernels configured for 4kiB page sizes.
Downstream, however, we simply don't support 4kiB pagesize configured
kernels in guest or host, so we can have qemu simply error out in this
situation.
Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified
it failed immediately with a qemu error
Signed-off-by: David Gibson <dgibson@redhat.com>
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
---
hw/ppc/spapr_caps.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 655ab856a0..6aa7f93df9 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
uint8_t val, Error **errp)
{
+#if 0 /* disabled for RHEL */
if (val < 12) {
error_setg(errp, "Require at least 4kiB hpt-max-page-size");
return;
} else if (val < 16) {
warn_report("Many guests require at least 64kiB hpt-max-page-size");
}
+#else /* Only page sizes >=64kiB supported for RHEL */
+ if (val < 16) {
+ error_setg(errp, "Require at least 64kiB hpt-max-page-size");
+ return;
+ }
+#endif
spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
}
--
2.31.1

@ -0,0 +1,77 @@
From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 20 Aug 2021 18:25:12 +0200
Subject: qcow2: Deprecation warning when opening v2 images rw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw
RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm)
RH-Bugzilla: 1951814
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Philippe Mathieu-Daudé <philmd@redhat.com>
qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so
there is no real reason any more to use it. People still using it might
do so unintentionally. Warn about it and suggest upgrading during the
RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10.
The warning is restricted to read-write mode and the system emulator.
The primary motivation for not having it in qemu-img is that 'qemu-img
amend' for upgrades would warn otherwise. It also avoids having to make
too many changes to the test suite.
bdrv_uses_whitelist() is used as a proxy for deciding whether we are
running in a tool or the system emulator. This is not entirely clean,
but it's what is available and the same function qcow2_do_open() already
uses it this way for another warning.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch
present_in_specfile: true
location_in_specfile: 116
---
Rebase notes (6.1.0):
- Replace bs->read_only with bdrv_is_read_only
---
block/qcow2.c | 6 ++++++
tests/qemu-iotests/common.filter | 1 +
2 files changed, 7 insertions(+)
diff --git a/block/qcow2.c b/block/qcow2.c
index b5c47931ef..a795e457ac 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
ret = -ENOTSUP;
goto fail;
}
+ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) {
+ warn_report_once("qcow2 v2 images are deprecated and may not be "
+ "supported in future versions. Please consider "
+ "upgrading the image with 'qemu-img amend "
+ "-o compat=v3'.");
+ }
s->qcow_version = header.version;
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index cc9f1a5891..6a13757177 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -83,6 +83,7 @@ _filter_qemu()
{
gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
+ -e "/qcow2 v2 images are deprecated/d" \
-e $'s#\r##' # QEMU monitor uses \r\n line endings
}
--
2.31.1

@ -0,0 +1,135 @@
From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 9 Feb 2022 04:16:25 -0500
Subject: WRB: Introduce RHEL 9.0.0 hw compat structure
General compatibility structure for post RHEL 9.0.0 rebase.
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
hw/core/machine.c | 9 +++++++++
hw/i386/pc.c | 6 ++++++
hw/i386/pc_piix.c | 4 ++++
hw/i386/pc_q35.c | 4 ++++
hw/s390x/s390-virtio-ccw.c | 2 ++
include/hw/boards.h | 3 +++
include/hw/i386/pc.h | 3 +++
7 files changed, 31 insertions(+)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 28989b6e7b..dffc3ef4ab 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = {
};
const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6);
+/*
+ * Mostly the same as hw_compat_6_2
+ */
+GlobalProperty hw_compat_rhel_9_0[] = {
+ /* hw_compat_rhel_9_0 from hw_compat_6_2 */
+ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"},
+};
+const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0);
+
/*
* Mostly the same as hw_compat_6_0 and hw_compat_6_1
*/
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 263d882af6..0886cfe3fe 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = {
};
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+GlobalProperty pc_rhel_9_0_compat[] = {
+ /* pc_rhel_9_0_compat from pc_compat_6_2 */
+ { "virtio-mem", "unplugged-inaccessible", "off" },
+};
+const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat);
+
GlobalProperty pc_rhel_8_5_compat[] = {
/* pc_rhel_8_5_compat from pc_compat_6_0 */
{ "qemu64" "-" TYPE_X86_CPU, "family", "6" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0cacc0d623..dc987fe93b 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m)
pcmc->kvmclock_create_always = false;
/* From pc_i440fx_5_1_machine_options() */
pcmc->pci_root_uid = 1;
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
+ hw_compat_rhel_9_0_len);
+ compat_props_add(m->compat_props, pc_rhel_9_0_compat,
+ pc_rhel_9_0_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_8_6,
hw_compat_rhel_8_6_len);
compat_props_add(m->compat_props, hw_compat_rhel_8_5,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 157160e069..52c253c570 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m)
m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.0.0";
+ compat_props_add(m->compat_props, hw_compat_rhel_9_0,
+ hw_compat_rhel_9_0_len);
+ compat_props_add(m->compat_props, pc_rhel_9_0_compat,
+ pc_rhel_9_0_compat_len);
}
DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 465a2a09d2..08e0f6a79b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc)
DEFINE_CCW_MACHINE(2_4, "2.4", false);
#endif
+
static void ccw_machine_rhel900_instance_options(MachineState *machine)
{
}
static void ccw_machine_rhel900_class_options(MachineClass *mc)
{
+ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len);
}
DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index d1555665df..635e45dd71 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len;
extern GlobalProperty hw_compat_2_1[];
extern const size_t hw_compat_2_1_len;
+extern GlobalProperty hw_compat_rhel_9_0[];
+extern const size_t hw_compat_rhel_9_0_len;
+
extern GlobalProperty hw_compat_rhel_8_6[];
extern const size_t hw_compat_rhel_8_6_len;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 419a6ec24b..a492c420b5 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len;
extern GlobalProperty pc_rhel_compat[];
extern const size_t pc_rhel_compat_len;
+extern GlobalProperty pc_rhel_9_0_compat[];
+extern const size_t pc_rhel_9_0_compat_len;
+
extern GlobalProperty pc_rhel_8_5_compat[];
extern const size_t pc_rhel_8_5_compat_len;
--
2.31.1

@ -0,0 +1,38 @@
From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 4 Apr 2022 12:15:50 +0200
Subject: redhat: Update s390x machine type compatibility for rebase to QEMU
7.0.0
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x]
RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd
RH-Bugzilla: 2064782
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
No s390x-specific machine class property updates required this time,
only an update to the default qemu cpu model.
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 08e0f6a79b..4a491d4988 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false);
static void ccw_machine_rhel900_instance_options(MachineState *machine)
{
+ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 };
+
+ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat);
}
static void ccw_machine_rhel900_class_options(MachineClass *mc)
--
2.31.1

@ -0,0 +1,70 @@
From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Thu, 14 Apr 2022 14:58:43 +0100
Subject: pc: Move s3/s4 suspend disabling to compat
RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-MergeRequest: 155: 7.0 machine type fixes (x86)
RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699
RH-Bugzilla: 2064771
Our downstream patches currently have tweaks in the C code to disable
s3/s4; Thomas pointed out we can just set the property.
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
hw/acpi/ich9.c | 4 ++--
hw/acpi/piix4.c | 4 ++--
hw/i386/pc.c | 6 ++++++
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index de1e401cdf..bd9bbade70 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN;
pm->acpi_memory_hotplug.is_enabled = true;
pm->cpu_hotplug_legacy = true;
- pm->disable_s3 = 1;
- pm->disable_s4 = 1;
+ pm->disable_s3 = 0;
+ pm->disable_s4 = 0;
pm->s4_val = 2;
pm->use_acpi_hotplug_bridge = true;
pm->keep_pci_slot_hpc = true;
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 28544e78c3..2fb2b43248 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
static Property piix4_pm_properties[] = {
DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0),
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1),
- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1),
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0),
+ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
use_acpi_hotplug_bridge, true),
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0886cfe3fe..f98f842f80 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4);
* machine type.
*/
GlobalProperty pc_rhel_compat[] = {
+ /* we don't support s3/s4 suspend */
+ { "PIIX4_PM", "disable_s3", "1" },
+ { "PIIX4_PM", "disable_s4", "1" },
+ { "ICH9-LPC", "disable_s3", "1" },
+ { "ICH9-LPC", "disable_s4", "1" },
+
{ TYPE_X86_CPU, "host-phys-bits", "on" },
{ TYPE_X86_CPU, "host-phys-bits-limit", "48" },
{ TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" },
--
2.31.1

@ -0,0 +1,10 @@
# The KVM HV implementation on Power can require a significant amount
# of unswappable memory (about half of which also needs to be host
# physically contiguous) to hold the guest's Hash Page Table (HPT) -
# roughly 1/64th of the guest's RAM size, minimum 16MiB.
#
# These limits allow unprivileged users to start smallish VMs, such as
# those used by libguestfs.
#
* hard memlock 65536
* soft memlock 65536

@ -0,0 +1,2 @@
SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \
TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service"

@ -0,0 +1,39 @@
qemu-kvm-tests README
=====================
The qemu-kvm-tests rpm contains tests that can be used to verify the
functionality of the installed qemu-kvm package
When installed, the files from this rpm will be arranged in the following
directory structure
tests-src/
├── README
├── scripts
│   ├── qemu.py
│   └── qmp
└── tests
├── acceptance
├── Makefile.include
└── qemu-iotests
The tests/ directory within the tests-src/ directory is setup to remain a copy
of a subset of the tests/ directory from the QEMU source tree
The avocado_qemu tests and qemu-iotests, along with files required for the
execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be
installed in a new location - /usr/lib64/qemu-kvm/tests-src/
avocado_qemu tests:
The avocado_qemu tests can be executed by running the following avocado command:
avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/
Avocado needs to be installed separately using either pip or from source as
Avocado is not being packaged for RHEL-8.
qemu-iotests:
symlinks to corresponding binaries need to be created for QEMU_PROG,
QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be
executed.
The primary purpose of this package is to make these tests available to be
executed as gating tests for the virt module in the RHEL-8 OSCI environment.

@ -0,0 +1 @@
allow virbr0

@ -0,0 +1,87 @@
From ac346634c5731407baa9de709dbd4d5cc6f45301 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 11 Jul 2022 18:11:12 -0300
Subject: [PATCH 02/11] Add dirty-sync-missed-zero-copy migration stat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [2/6] 115035fd0a4e4b9439c91fb0f5d1a2f9244ba369 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20220711211112.18951-3-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 2 ++
monitor/hmp-cmds.c | 5 +++++
qapi/migration.json | 7 ++++++-
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/migration/migration.c b/migration/migration.c
index 8fb3eae910..3a3a7a4a50 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1017,6 +1017,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
info->ram->normal_bytes = ram_counters.normal * page_size;
info->ram->mbps = s->mbps;
info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+ info->ram->dirty_sync_missed_zero_copy =
+ ram_counters.dirty_sync_missed_zero_copy;
info->ram->postcopy_requests = ram_counters.postcopy_requests;
info->ram->page_size = page_size;
info->ram->multifd_bytes = ram_counters.multifd_bytes;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 634968498b..9cec01de38 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n",
info->ram->postcopy_bytes >> 10);
}
+ if (info->ram->dirty_sync_missed_zero_copy) {
+ monitor_printf(mon,
+ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n",
+ info->ram->dirty_sync_missed_zero_copy);
+ }
}
if (info->has_disk) {
diff --git a/qapi/migration.json b/qapi/migration.json
index 5105790cd0..9b38b3c21c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -55,6 +55,10 @@
# @postcopy-bytes: The number of bytes sent during the post-copy phase
# (since 7.0).
#
+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could
+# not avoid copying dirty pages. This is between
+# 0 and @dirty-sync-count * @multifd-channels.
+# (since 7.1)
# Since: 0.14
##
{ 'struct': 'MigrationStats',
@@ -65,7 +69,8 @@
'postcopy-requests' : 'int', 'page-size' : 'int',
'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64',
'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64',
- 'postcopy-bytes' : 'uint64' } }
+ 'postcopy-bytes' : 'uint64',
+ 'dirty-sync-missed-zero-copy' : 'uint64' } }
##
# @XBZRLECacheStats:
--
2.31.1

@ -0,0 +1,41 @@
From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 6 May 2022 17:03:11 +0200
Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64
RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm)
RH-Bugzilla: 1477099
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798
Upstream Status: RHEL-only
Tested: With virtio-net-pci and virtio-block-pci
let's enable the virtio-iommu-pci device on aarch64 by
turning CONFIG_VIRTIO_IOMMU on.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
index 187938573f..1618d31b89 100644
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_MEM=y
+CONFIG_VIRTIO_IOMMU=y
CONFIG_XIO3130=y
CONFIG_NVDIMM=y
CONFIG_ACPI_APEI=y
--
2.31.1

@ -0,0 +1,41 @@
From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Thu, 9 Jun 2022 11:35:18 +0200
Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64
RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm)
RH-Bugzilla: 2094252
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185
Upstream Status: RHEL-only
Tested: With virtio-net-pci and virtio-block-pci
let's enable the virtio-iommu-pci device on x86_64 by
turning CONFIG_VIRTIO_IOMMU on.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
index d0c9e66641..3850b9de72 100644
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y
CONFIG_VIRTIO_MEM=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_VGA=y
+CONFIG_VIRTIO_IOMMU=y
CONFIG_VMMOUSE=y
CONFIG_VMPORT=y
CONFIG_VTD=y
--
2.31.1

@ -0,0 +1,503 @@
From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Date: Mon, 25 Apr 2022 09:57:21 +0200
Subject: [PATCH 01/16] Introduce event-loop-base abstract class
RH-Author: Nicolas Saenz Julienne <nsaenzju@redhat.com>
RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size
RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990
RH-Bugzilla: 2031024
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Introduce the 'event-loop-base' abstract class, it'll hold the
properties common to all event loops and provide the necessary hooks for
their creation and maintenance. Then have iothread inherit from it.
EventLoopBaseClass is defined as user creatable and provides a hook for
its children to attach themselves to the user creatable class 'complete'
function. It also provides an update_params() callback to propagate
property changes onto its children.
The new 'event-loop-base' class will live in the root directory. It is
built on its own using the 'link_whole' option (there are no direct
function dependencies between the class and its children, it all happens
trough 'constructor' magic). And also imposes new compilation
dependencies:
qom <- event-loop-base <- blockdev (iothread.c)
And in subsequent patches:
qom <- event-loop-base <- qemuutil (util/main-loop.c)
All this forced some amount of reordering in meson.build:
- Moved qom build definition before qemuutil. Doing it the other way
around (i.e. moving qemuutil after qom) isn't possible as a lot of
core libraries that live in between the two depend on it.
- Process the 'hw' subdir earlier, as it introduces files into the
'qom' source set.
No functional changes intended.
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-id: 20220425075723.20019-2-nsaenzju@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d)
---
event-loop-base.c | 104 +++++++++++++++++++++++++++++++
include/sysemu/event-loop-base.h | 36 +++++++++++
include/sysemu/iothread.h | 6 +-
iothread.c | 65 ++++++-------------
meson.build | 23 ++++---
qapi/qom.json | 22 +++++--
6 files changed, 192 insertions(+), 64 deletions(-)
create mode 100644 event-loop-base.c
create mode 100644 include/sysemu/event-loop-base.h
diff --git a/event-loop-base.c b/event-loop-base.c
new file mode 100644
index 0000000000..a924c73a7c
--- /dev/null
+++ b/event-loop-base.c
@@ -0,0 +1,104 @@
+/*
+ * QEMU event-loop base
+ *
+ * Copyright (C) 2022 Red Hat Inc
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object_interfaces.h"
+#include "qapi/error.h"
+#include "sysemu/event-loop-base.h"
+
+typedef struct {
+ const char *name;
+ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
+} EventLoopBaseParamInfo;
+
+static EventLoopBaseParamInfo aio_max_batch_info = {
+ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
+};
+
+static void event_loop_base_get_param(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
+ EventLoopBaseParamInfo *info = opaque;
+ int64_t *field = (void *)event_loop_base + info->offset;
+
+ visit_type_int64(v, name, field, errp);
+}
+
+static void event_loop_base_set_param(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
+ EventLoopBase *base = EVENT_LOOP_BASE(obj);
+ EventLoopBaseParamInfo *info = opaque;
+ int64_t *field = (void *)base + info->offset;
+ int64_t value;
+
+ if (!visit_type_int64(v, name, &value, errp)) {
+ return;
+ }
+
+ if (value < 0) {
+ error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
+ info->name, INT64_MAX);
+ return;
+ }
+
+ *field = value;
+
+ if (bc->update_params) {
+ bc->update_params(base, errp);
+ }
+
+ return;
+}
+
+static void event_loop_base_complete(UserCreatable *uc, Error **errp)
+{
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
+ EventLoopBase *base = EVENT_LOOP_BASE(uc);
+
+ if (bc->init) {
+ bc->init(base, errp);
+ }
+}
+
+static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
+{
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
+ ucc->complete = event_loop_base_complete;
+
+ object_class_property_add(klass, "aio-max-batch", "int",
+ event_loop_base_get_param,
+ event_loop_base_set_param,
+ NULL, &aio_max_batch_info);
+}
+
+static const TypeInfo event_loop_base_info = {
+ .name = TYPE_EVENT_LOOP_BASE,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(EventLoopBase),
+ .class_size = sizeof(EventLoopBaseClass),
+ .class_init = event_loop_base_class_init,
+ .abstract = true,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
+static void register_types(void)
+{
+ type_register_static(&event_loop_base_info);
+}
+type_init(register_types);
diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
new file mode 100644
index 0000000000..8e77d8b69f
--- /dev/null
+++ b/include/sysemu/event-loop-base.h
@@ -0,0 +1,36 @@
+/*
+ * QEMU event-loop backend
+ *
+ * Copyright (C) 2022 Red Hat Inc
+ *
+ * Authors:
+ * Nicolas Saenz Julienne <nsaenzju@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QEMU_EVENT_LOOP_BASE_H
+#define QEMU_EVENT_LOOP_BASE_H
+
+#include "qom/object.h"
+#include "block/aio.h"
+#include "qemu/typedefs.h"
+
+#define TYPE_EVENT_LOOP_BASE "event-loop-base"
+OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
+ EVENT_LOOP_BASE)
+
+struct EventLoopBaseClass {
+ ObjectClass parent_class;
+
+ void (*init)(EventLoopBase *base, Error **errp);
+ void (*update_params)(EventLoopBase *base, Error **errp);
+};
+
+struct EventLoopBase {
+ Object parent;
+
+ /* AioContext AIO engine parameters */
+ int64_t aio_max_batch;
+};
+#endif
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 7f714bd136..8f8601d6ab 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -17,11 +17,12 @@
#include "block/aio.h"
#include "qemu/thread.h"
#include "qom/object.h"
+#include "sysemu/event-loop-base.h"
#define TYPE_IOTHREAD "iothread"
struct IOThread {
- Object parent_obj;
+ EventLoopBase parent_obj;
QemuThread thread;
AioContext *ctx;
@@ -37,9 +38,6 @@ struct IOThread {
int64_t poll_max_ns;
int64_t poll_grow;
int64_t poll_shrink;
-
- /* AioContext AIO engine parameters */
- int64_t aio_max_batch;
};
typedef struct IOThread IOThread;
diff --git a/iothread.c b/iothread.c
index 0f98af0f2a..8fa2f3bfb8 100644
--- a/iothread.c
+++ b/iothread.c
@@ -17,6 +17,7 @@
#include "qemu/module.h"
#include "block/aio.h"
#include "block/block.h"
+#include "sysemu/event-loop-base.h"
#include "sysemu/iothread.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-misc.h"
@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread)
iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
}
-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
+static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
{
+ IOThread *iothread = IOTHREAD(base);
ERRP_GUARD();
+ if (!iothread->ctx) {
+ return;
+ }
+
aio_context_set_poll_params(iothread->ctx,
iothread->poll_max_ns,
iothread->poll_grow,
@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
}
aio_context_set_aio_params(iothread->ctx,
- iothread->aio_max_batch,
+ iothread->parent_obj.aio_max_batch,
errp);
}
-static void iothread_complete(UserCreatable *obj, Error **errp)
+
+static void iothread_init(EventLoopBase *base, Error **errp)
{
Error *local_error = NULL;
- IOThread *iothread = IOTHREAD(obj);
+ IOThread *iothread = IOTHREAD(base);
char *thread_name;
iothread->stopping = false;
@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
*/
iothread_init_gcontext(iothread);
- iothread_set_aio_context_params(iothread, &local_error);
+ iothread_set_aio_context_params(base, &local_error);
if (local_error) {
error_propagate(errp, local_error);
aio_context_unref(iothread->ctx);
@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
* to inherit.
*/
thread_name = g_strdup_printf("IO %s",
- object_get_canonical_path_component(OBJECT(obj)));
+ object_get_canonical_path_component(OBJECT(base)));
qemu_thread_create(&iothread->thread, thread_name, iothread_run,
iothread, QEMU_THREAD_JOINABLE);
g_free(thread_name);
@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = {
static IOThreadParamInfo poll_shrink_info = {
"poll-shrink", offsetof(IOThread, poll_shrink),
};
-static IOThreadParamInfo aio_max_batch_info = {
- "aio-max-batch", offsetof(IOThread, aio_max_batch),
-};
static void iothread_get_param(Object *obj, Visitor *v,
const char *name, IOThreadParamInfo *info, Error **errp)
@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
}
}
-static void iothread_get_aio_param(Object *obj, Visitor *v,
- const char *name, void *opaque, Error **errp)
-{
- IOThreadParamInfo *info = opaque;
-
- iothread_get_param(obj, v, name, info, errp);
-}
-
-static void iothread_set_aio_param(Object *obj, Visitor *v,
- const char *name, void *opaque, Error **errp)
-{
- IOThread *iothread = IOTHREAD(obj);
- IOThreadParamInfo *info = opaque;
-
- if (!iothread_set_param(obj, v, name, info, errp)) {
- return;
- }
-
- if (iothread->ctx) {
- aio_context_set_aio_params(iothread->ctx,
- iothread->aio_max_batch,
- errp);
- }
-}
-
static void iothread_class_init(ObjectClass *klass, void *class_data)
{
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
- ucc->complete = iothread_complete;
+ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
+
+ bc->init = iothread_init;
+ bc->update_params = iothread_set_aio_context_params;
object_class_property_add(klass, "poll-max-ns", "int",
iothread_get_poll_param,
@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
iothread_get_poll_param,
iothread_set_poll_param,
NULL, &poll_shrink_info);
- object_class_property_add(klass, "aio-max-batch", "int",
- iothread_get_aio_param,
- iothread_set_aio_param,
- NULL, &aio_max_batch_info);
}
static const TypeInfo iothread_info = {
.name = TYPE_IOTHREAD,
- .parent = TYPE_OBJECT,
+ .parent = TYPE_EVENT_LOOP_BASE,
.class_init = iothread_class_init,
.instance_size = sizeof(IOThread),
.instance_init = iothread_instance_init,
.instance_finalize = iothread_instance_finalize,
- .interfaces = (InterfaceInfo[]) {
- {TYPE_USER_CREATABLE},
- {}
- },
};
static void iothread_register_types(void)
@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque)
info->poll_max_ns = iothread->poll_max_ns;
info->poll_grow = iothread->poll_grow;
info->poll_shrink = iothread->poll_shrink;
- info->aio_max_batch = iothread->aio_max_batch;
+ info->aio_max_batch = iothread->parent_obj.aio_max_batch;
QAPI_LIST_APPEND(*tail, info);
return 0;
diff --git a/meson.build b/meson.build
index 6f7e430f0f..b9c919a55e 100644
--- a/meson.build
+++ b/meson.build
@@ -2804,6 +2804,7 @@ subdir('qom')
subdir('authz')
subdir('crypto')
subdir('ui')
+subdir('hw')
if enable_modules
@@ -2811,6 +2812,18 @@ if enable_modules
modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
endif
+qom_ss = qom_ss.apply(config_host, strict: false)
+libqom = static_library('qom', qom_ss.sources() + genh,
+ dependencies: [qom_ss.dependencies()],
+ name_suffix: 'fa')
+qom = declare_dependency(link_whole: libqom)
+
+event_loop_base = files('event-loop-base.c')
+event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh,
+ build_by_default: true)
+event_loop_base = declare_dependency(link_whole: event_loop_base,
+ dependencies: [qom])
+
stub_ss = stub_ss.apply(config_all, strict: false)
util_ss.add_all(trace_ss)
@@ -2897,7 +2910,6 @@ subdir('monitor')
subdir('net')
subdir('replay')
subdir('semihosting')
-subdir('hw')
subdir('tcg')
subdir('fpu')
subdir('accel')
@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
capture: true,
command: [undefsym, nm, '@INPUT@'])
-qom_ss = qom_ss.apply(config_host, strict: false)
-libqom = static_library('qom', qom_ss.sources() + genh,
- dependencies: [qom_ss.dependencies()],
- name_suffix: 'fa')
-
-qom = declare_dependency(link_whole: libqom)
-
authz_ss = authz_ss.apply(config_host, strict: false)
libauthz = static_library('authz', authz_ss.sources() + genh,
dependencies: [authz_ss.dependencies()],
@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
build_by_default: false)
blockdev = declare_dependency(link_whole: [libblockdev],
- dependencies: [block])
+ dependencies: [block, event_loop_base])
qmp_ss = qmp_ss.apply(config_host, strict: false)
libqmp = static_library('qmp', qmp_ss.sources() + genh,
diff --git a/qapi/qom.json b/qapi/qom.json
index eeb5395ff3..a2439533c5 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -499,6 +499,20 @@
'*repeat': 'bool',
'*grab-toggle': 'GrabToggleKeys' } }
+##
+# @EventLoopBaseProperties:
+#
+# Common properties for event loops
+#
+# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+# 0 means that the engine will use its default.
+# (default: 0)
+#
+# Since: 7.1
+##
+{ 'struct': 'EventLoopBaseProperties',
+ 'data': { '*aio-max-batch': 'int' } }
+
##
# @IothreadProperties:
#
@@ -516,17 +530,15 @@
# algorithm detects it is spending too long polling without
# encountering events. 0 selects a default behaviour (default: 0)
#
-# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
-# 0 means that the engine will use its default
-# (default:0, since 6.1)
+# The @aio-max-batch option is available since 6.1.
#
# Since: 2.0
##
{ 'struct': 'IothreadProperties',
+ 'base': 'EventLoopBaseProperties',
'data': { '*poll-max-ns': 'int',
'*poll-grow': 'int',
- '*poll-shrink': 'int',
- '*aio-max-batch': 'int' } }
+ '*poll-shrink': 'int' } }
##
# @MemoryBackendProperties:
--
2.31.1

@ -0,0 +1,420 @@
From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:31 -0300
Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce
io_flush callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Add flags to io_writev and introduce io_flush as optional callback to
QIOChannelClass, allowing the implementation of zero copy writes by
subclasses.
How to use them:
- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
- Wait write completion with qio_channel_flush().
Notes:
As some zero copy write implementations work asynchronously, it's
recommended to keep the write buffer untouched until the return of
qio_channel_flush(), to avoid the risk of sending an updated buffer
instead of the buffer state during write.
As io_flush callback is optional, if a subclass does not implement it, then:
- io_flush will return 0 without changing anything.
Also, some functions like qio_channel_writev_full_all() were adapted to
receive a flag parameter. That allows shared code between zero copy and
non-zero copy writev, and also an easier implementation on new flags.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20220513062836.965425-3-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
chardev/char-io.c | 2 +-
hw/remote/mpqemu-link.c | 2 +-
include/io/channel.h | 38 +++++++++++++++++++++-
io/channel-buffer.c | 1 +
io/channel-command.c | 1 +
io/channel-file.c | 1 +
io/channel-socket.c | 2 ++
io/channel-tls.c | 1 +
io/channel-websock.c | 1 +
io/channel.c | 49 +++++++++++++++++++++++------
migration/rdma.c | 1 +
scsi/pr-manager-helper.c | 2 +-
tests/unit/test-io-channel-socket.c | 1 +
13 files changed, 88 insertions(+), 14 deletions(-)
diff --git a/chardev/char-io.c b/chardev/char-io.c
index 8ced184160..4451128cba 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc,
ret = qio_channel_writev_full(
ioc, &iov, 1,
- fds, nfds, NULL);
+ fds, nfds, 0, NULL);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
if (offset) {
return offset;
diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index 7e841820e5..e8f556bd27 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
}
if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
- fds, nfds, errp)) {
+ fds, nfds, 0, errp)) {
ret = true;
} else {
trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);
diff --git a/include/io/channel.h b/include/io/channel.h
index 88988979f8..c680ee7480 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
#define QIO_CHANNEL_ERR_BLOCK -2
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
typedef enum QIOChannelFeature QIOChannelFeature;
enum QIOChannelFeature {
QIO_CHANNEL_FEATURE_FD_PASS,
QIO_CHANNEL_FEATURE_SHUTDOWN,
QIO_CHANNEL_FEATURE_LISTEN,
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
};
@@ -104,6 +107,7 @@ struct QIOChannelClass {
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp);
ssize_t (*io_readv)(QIOChannel *ioc,
const struct iovec *iov,
@@ -136,6 +140,8 @@ struct QIOChannelClass {
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
+ int (*io_flush)(QIOChannel *ioc,
+ Error **errp);
};
/* General I/O handling functions */
@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
* Write data to the IO channel, reading it from the
@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp);
/**
@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: an array of file handles to send
* @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
*
@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
* to be written, yielding from the current coroutine
* if required.
*
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush()
+ * before reusing the buffer.
+ *
* Returns: 0 if all bytes were written, or -1 on error
*/
@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
- Error **errp);
+ int flags, Error **errp);
+
+/**
+ * qio_channel_flush:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
+ * is sent, or return in case of any error.
+ *
+ * If not implemented, acts as a no-op, and returns 0.
+ *
+ * Returns -1 if any error is found,
+ * 1 if every send failed to use zero copy.
+ * 0 otherwise.
+ */
+
+int qio_channel_flush(QIOChannel *ioc,
+ Error **errp);
#endif /* QIO_CHANNEL_H */
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index baa4e2b089..bf52011be2 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index 338da73ade..54560464ae 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index d7cf6d278f..ef6807a6be 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 7a8d9f69c9..a1be2197ca 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 2ae1b92fc0..4ce890a538 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index 55145a6a8c..9619906ac3 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index e8b019dc36..0640941ac5 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
- if ((fds || nfds) &&
- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+ if (fds || nfds) {
+ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+ error_setg_errno(errp, EINVAL,
+ "Channel does not support file descriptor passing");
+ return -1;
+ }
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ error_setg_errno(errp, EINVAL,
+ "Zero Copy does not support file descriptor passing");
+ return -1;
+ }
+ }
+
+ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
error_setg_errno(errp, EINVAL,
- "Channel does not support file descriptor passing");
+ "Requested Zero Copy feature is not available");
return -1;
}
- return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
+ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
}
@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
}
int qio_channel_writev_full_all(QIOChannel *ioc,
const struct iovec *iov,
size_t niov,
int *fds, size_t nfds,
- Error **errp)
+ int flags, Error **errp)
{
int ret = -1;
struct iovec *local_iov = g_new(struct iovec, niov);
@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
while (nlocal_iov > 0) {
ssize_t len;
- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
- errp);
+
+ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
+ nfds, flags, errp);
+
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(ioc, G_IO_OUT);
@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
+ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
}
@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc,
Error **errp)
{
struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
+ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
}
@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
return klass->io_seek(ioc, offset, whence, errp);
}
+int qio_channel_flush(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+ if (!klass->io_flush ||
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+ return 0;
+ }
+
+ return klass->io_flush(ioc, errp);
+}
+
static void qio_channel_restart_read(void *opaque)
{
diff --git a/migration/rdma.c b/migration/rdma.c
index ef1e65ec36..672d1958a9 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
size_t niov,
int *fds,
size_t nfds,
+ int flags,
Error **errp)
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
index 451c7631b7..3be52a98d5 100644
--- a/scsi/pr-manager-helper.c
+++ b/scsi/pr-manager-helper.c
@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
iov.iov_base = (void *)buf;
iov.iov_len = sz;
n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
- nfds ? &fd : NULL, nfds, errp);
+ nfds ? &fd : NULL, nfds, 0, errp);
if (n_written <= 0) {
assert(n_written != QIO_CHANNEL_ERR_BLOCK);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index c49eec1f03..6713886d02 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void)
G_N_ELEMENTS(iosend),
fdsend,
G_N_ELEMENTS(fdsend),
+ 0,
&error_abort);
qio_channel_readv_full(dst,
--
2.35.3

@ -0,0 +1,56 @@
From cb6dc39a5e5d2d981b4b1e983042b3fbb529d5d1 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Thu, 4 Aug 2022 04:10:43 -0300
Subject: [PATCH 06/11] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [6/6] 2eb1aba8ebf267a6f67cfba2e489dc88619c7fd4 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
For using MSG_ZEROCOPY, there are two steps:
1 - io_writev() the packet, which enqueues the packet for sending, and
2 - io_flush(), which gets confirmation that all packets got correctly sent
Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will
be reported in (1), but it will fail in the first time (2) happens.
This happens because (2) currently checks for cmsg_level & cmsg_type
associated with IPV4 only, before reporting any error.
Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable
support for MSG_ZEROCOPY + IPV6
Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index eb7baa2184..efd5f60808 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
}
cm = CMSG_FIRSTHDR(&msg);
- if (cm->cmsg_level != SOL_IP &&
- cm->cmsg_type != IP_RECVERR) {
+ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR &&
+ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
error_setg_errno(errp, EPROTOTYPE,
"Wrong cmsg in errqueue");
return -1;
--
2.31.1

@ -0,0 +1,65 @@
From 678981c6bb7c964e1591f6f8aba49e9602f64852 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 11 Jul 2022 18:11:11 -0300
Subject: [PATCH 01/11] QIOChannelSocket: Fix zero-copy flush returning code 1
when nothing sent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [1/6] cebc887cb61de1572d8ae3232cde45e80c339404 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently
returns 1. This return code should be used only when Linux fails to use
MSG_ZEROCOPY on a lot of sendmsg().
Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY)
was attempted.
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Acked-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220711211112.18951-2-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 8ae8b212cf..eb7baa2184 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
struct cmsghdr *cm;
char control[CMSG_SPACE(sizeof(*serr))];
int received;
- int ret = 1;
+ int ret;
+
+ if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
+ return 0;
+ }
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
memset(control, 0, sizeof(control));
+ ret = 1;
+
while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
if (received < 0) {
--
2.31.1

@ -0,0 +1,58 @@
From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 20 Jun 2022 02:39:43 -0300
Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush
works
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
part of the flushing mechanism got missing: incrementing zero_copy_queued.
Without that, the flushing interface becomes a no-op, and there is no
guarantee the buffer is really sent.
This can go as bad as causing a corruption in RAM during migration.
Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
Reported-by: 徐闯 <xuchuangxclwt@bytedance.com>
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 7490e5943d..8ae8b212cf 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
"Unable to write to socket");
return -1;
}
+
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ sioc->zero_copy_queued++;
+ }
+
return ret;
}
#else /* WIN32 */
--
2.35.3

@ -0,0 +1,249 @@
From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:32 -0300
Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag &
io_flush for CONFIG_LINUX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
For CONFIG_LINUX, implement the new zero copy flag and the optional callback
io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY
feature is available in the host kernel, which is checked on
qio_channel_socket_connect_sync()
qio_channel_socket_flush() was implemented by counting how many times
sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the
socket's error queue, in order to find how many of them finished sending.
Flush will loop until those counters are the same, or until some error occurs.
Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY:
1: Buffer
- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
some caution is necessary to avoid overwriting any buffer before it's sent.
If something like this happen, a newer version of the buffer may be sent instead.
- If this is a problem, it's recommended to call qio_channel_flush() before freeing
or re-using the buffer.
2: Locked memory
- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and
unlocked after it's sent.
- Depending on the size of each buffer, and how often it's sent, it may require
a larger amount of locked memory than usually available to non-root user.
- If the required amount of locked memory is not available, writev_zero_copy
will return an error, which can abort an operation like migration,
- Because of this, when an user code wants to add zero copy as a feature, it
requires a mechanism to disable it, so it can still be accessible to less
privileged users.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Message-Id: <20220513062836.965425-4-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
include/io/channel-socket.h | 2 +
io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++--
2 files changed, 114 insertions(+), 4 deletions(-)
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index e747e63514..513c428fe4 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
socklen_t localAddrLen;
struct sockaddr_storage remoteAddr;
socklen_t remoteAddrLen;
+ ssize_t zero_copy_queued;
+ ssize_t zero_copy_sent;
};
diff --git a/io/channel-socket.c b/io/channel-socket.c
index a1be2197ca..fbd2214d20 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -26,6 +26,14 @@
#include "io/channel-watch.h"
#include "trace.h"
#include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include <linux/errqueue.h>
+#include <sys/socket.h>
+
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
+#define QEMU_MSG_ZEROCOPY
+#endif
+#endif
#define SOCKET_MAX_FDS 16
@@ -55,6 +63,8 @@ qio_channel_socket_new(void)
sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
sioc->fd = -1;
+ sioc->zero_copy_queued = 0;
+ sioc->zero_copy_sent = 0;
ioc = QIO_CHANNEL(sioc);
qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
return -1;
}
+#ifdef QEMU_MSG_ZEROCOPY
+ int ret, v = 1;
+ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+ if (ret == 0) {
+ /* Zero copy available on host */
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
+ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
+ }
+#endif
+
return 0;
}
@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
size_t fdsize = sizeof(int) * nfds;
struct cmsghdr *cmsg;
+ int sflags = 0;
memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
memcpy(CMSG_DATA(cmsg), fds, fdsize);
}
+#ifdef QEMU_MSG_ZEROCOPY
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+ sflags = MSG_ZEROCOPY;
+ }
+#endif
+
retry:
- ret = sendmsg(sioc->fd, &msg, 0);
+ ret = sendmsg(sioc->fd, &msg, sflags);
if (ret <= 0) {
- if (errno == EAGAIN) {
+ switch (errno) {
+ case EAGAIN:
return QIO_CHANNEL_ERR_BLOCK;
- }
- if (errno == EINTR) {
+ case EINTR:
goto retry;
+#ifdef QEMU_MSG_ZEROCOPY
+ case ENOBUFS:
+ if (sflags & MSG_ZEROCOPY) {
+ error_setg_errno(errp, errno,
+ "Process can't lock enough memory for using MSG_ZEROCOPY");
+ return -1;
+ }
+ break;
+#endif
}
+
error_setg_errno(errp, errno,
"Unable to write to socket");
return -1;
@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
}
#endif /* WIN32 */
+
+#ifdef QEMU_MSG_ZEROCOPY
+static int qio_channel_socket_flush(QIOChannel *ioc,
+ Error **errp)
+{
+ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+ struct msghdr msg = {};
+ struct sock_extended_err *serr;
+ struct cmsghdr *cm;
+ char control[CMSG_SPACE(sizeof(*serr))];
+ int received;
+ int ret = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ memset(control, 0, sizeof(control));
+
+ while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
+ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
+ if (received < 0) {
+ switch (errno) {
+ case EAGAIN:
+ /* Nothing on errqueue, wait until something is available */
+ qio_channel_wait(ioc, G_IO_ERR);
+ continue;
+ case EINTR:
+ continue;
+ default:
+ error_setg_errno(errp, errno,
+ "Unable to read errqueue");
+ return -1;
+ }
+ }
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_type != IP_RECVERR) {
+ error_setg_errno(errp, EPROTOTYPE,
+ "Wrong cmsg in errqueue");
+ return -1;
+ }
+
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
+ error_setg_errno(errp, serr->ee_errno,
+ "Error on socket");
+ return -1;
+ }
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+ error_setg_errno(errp, serr->ee_origin,
+ "Error not from zero copy");
+ return -1;
+ }
+
+ /* No errors, count successfully finished sendmsg()*/
+ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
+
+ /* If any sendmsg() succeeded using zero copy, return 0 at the end */
+ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+#endif /* QEMU_MSG_ZEROCOPY */
+
static int
qio_channel_socket_set_blocking(QIOChannel *ioc,
bool enabled,
@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
ioc_klass->io_set_delay = qio_channel_socket_set_delay;
ioc_klass->io_create_watch = qio_channel_socket_create_watch;
ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
+#ifdef QEMU_MSG_ZEROCOPY
+ ioc_klass->io_flush = qio_channel_socket_flush;
+#endif
}
static const TypeInfo qio_channel_socket_info = {
--
2.35.3

@ -0,0 +1,82 @@
From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 20 Jun 2022 02:39:42 -0300
Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to
improve readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were
introduced, particularly at qio_channel_socket_writev().
Rewrite some of those changes so it's easier to read.
Also, introduce an assert to help detect incorrect zero-copy usage is when
it's disabled on build.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached
(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
io/channel-socket.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/io/channel-socket.c b/io/channel-socket.c
index fbd2214d20..7490e5943d 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
memcpy(CMSG_DATA(cmsg), fds, fdsize);
}
-#ifdef QEMU_MSG_ZEROCOPY
if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+#ifdef QEMU_MSG_ZEROCOPY
sflags = MSG_ZEROCOPY;
- }
+#else
+ /*
+ * We expect QIOChannel class entry point to have
+ * blocked this code path already
+ */
+ g_assert_not_reached();
#endif
+ }
retry:
ret = sendmsg(sioc->fd, &msg, sflags);
@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
return QIO_CHANNEL_ERR_BLOCK;
case EINTR:
goto retry;
-#ifdef QEMU_MSG_ZEROCOPY
case ENOBUFS:
- if (sflags & MSG_ZEROCOPY) {
+ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
error_setg_errno(errp, errno,
"Process can't lock enough memory for using MSG_ZEROCOPY");
return -1;
}
break;
-#endif
}
error_setg_errno(errp, errno,
--
2.35.3

@ -0,0 +1,237 @@
From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 15 Jun 2022 15:28:27 +0200
Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu)
RH-Bugzilla: 2060839
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824
Upstream Status: RHEL only
We only need to support AArch64 cpu types and we only need three
types:
1) A base type to use with TCG, i.e. a cpu type with only base
features. 'cortex-a57' serves this role and is currently used
by libguestfs.
2) The 'max' type, which is for both KVM and TCG and is good for
tests that just specify 'max' but run under both. 'max' with
TCG also provides the VM with all the CPU features TCG
supports, which is good for VMs that need features not
provided by the basic cortex-a57.
3) The host type which is used with KVM.
Signed-off-by: Andrew Jones <drjones@redhat.com>
---
hw/arm/virt.c | 4 ++++
target/arm/cpu64.c | 6 ++++++
target/arm/cpu_tcg.c | 12 ++----------
tests/qtest/arm-cpu-features.c | 6 ++++++
4 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 95d012d6eb..74119976d3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -239,12 +239,16 @@ static const int a15irqmap[] = {
};
static const char *valid_cpus[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
ARM_CPU_TYPE_NAME("cortex-a7"),
ARM_CPU_TYPE_NAME("cortex-a15"),
ARM_CPU_TYPE_NAME("cortex-a53"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("cortex-a57"),
+#if 0 /* Disabled for Red Hat Enterprise Linux */
ARM_CPU_TYPE_NAME("cortex-a72"),
ARM_CPU_TYPE_NAME("a64fx"),
+#endif /* disabled for RHEL */
ARM_CPU_TYPE_NAME("host"),
ARM_CPU_TYPE_NAME("max"),
};
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index eb44c05822..e80b831073 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj)
define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void aarch64_a53_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj)
cpu->gic_vprebits = 5;
define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
}
+#endif /* disabled for RHEL */
void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
{
@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj)
qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property);
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static void aarch64_a64fx_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj)
/* TODO: Add A64FX specific HPC extension registers */
}
+#endif /* disabled for RHEL */
static const ARMCPUInfo aarch64_cpus[] = {
{ .name = "cortex-a57", .initfn = aarch64_a57_initfn },
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
{ .name = "a64fx", .initfn = aarch64_a64fx_initfn },
+#endif /* disabled for RHEL */
{ .name = "max", .initfn = aarch64_max_initfn },
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
{ .name = "host", .initfn = aarch64_host_initfn },
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 3826fa5122..74727fc92c 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -19,10 +19,10 @@
#include "hw/boards.h"
#endif
+#if 0 /* Disabled for Red Hat Enterprise Linux */
/* CPU models. These are not needed for the AArch64 linux-user build. */
#if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64)
-#if 0 /* Disabled for Red Hat Enterprise Linux */
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
{
@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj)
cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */
define_arm_cp_regs(cpu, cortexa9_cp_reginfo);
}
-#endif /* disabled for RHEL */
#ifndef CONFIG_USER_ONLY
static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
REGINFO_SENTINEL
};
-#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_a7_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj)
cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
}
-#endif /* disabled for RHEL */
static void cortex_a15_initfn(Object *obj)
{
@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj)
define_arm_cp_regs(cpu, cortexa15_cp_reginfo);
}
-#if 0 /* Disabled for Red Hat Enterprise Linux */
static void cortex_m0_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data)
cc->gdb_core_xml_file = "arm-m-profile.xml";
}
-#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
/*
@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj)
#endif /* !TARGET_AARCH64 */
static const ARMCPUInfo arm_tcg_cpus[] = {
-#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "arm926", .initfn = arm926_initfn },
{ .name = "arm946", .initfn = arm946_initfn },
{ .name = "arm1026", .initfn = arm1026_initfn },
@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "cortex-a7", .initfn = cortex_a7_initfn },
{ .name = "cortex-a8", .initfn = cortex_a8_initfn },
{ .name = "cortex-a9", .initfn = cortex_a9_initfn },
-#endif /* disabled for RHEL */
{ .name = "cortex-a15", .initfn = cortex_a15_initfn },
-#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-m0", .initfn = cortex_m0_initfn,
.class_init = arm_v7m_class_init },
{ .name = "cortex-m3", .initfn = cortex_m3_initfn,
@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = {
{ .name = "pxa270-b1", .initfn = pxa270b1_initfn },
{ .name = "pxa270-c0", .initfn = pxa270c0_initfn },
{ .name = "pxa270-c5", .initfn = pxa270c5_initfn },
-#endif /* disabled for RHEL */
#ifndef TARGET_AARCH64
{ .name = "max", .initfn = arm_max_initfn },
#endif
@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void)
type_init(arm_tcg_cpu_register_types)
#endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */
+#endif /* disabled for RHEL */
diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c
index f76652143a..fe2a0a070d 100644
--- a/tests/qtest/arm-cpu-features.c
+++ b/tests/qtest/arm-cpu-features.c
@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data)
assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL);
/* Test expected feature presence/absence for some cpu types */
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "cortex-a15", "pmu");
assert_has_not_feature(qts, "cortex-a15", "aarch64");
+#endif /* disabled for RHEL */
/* Enabling and disabling pmu should always work. */
assert_has_feature_enabled(qts, "max", "pmu");
@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data)
assert_has_feature_enabled(qts, "cortex-a57", "pmu");
assert_has_feature_enabled(qts, "cortex-a57", "aarch64");
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_has_feature_enabled(qts, "a64fx", "pmu");
assert_has_feature_enabled(qts, "a64fx", "aarch64");
/*
@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data)
"{ 'sve384': true }");
assert_error(qts, "a64fx", "cannot enable sve640",
"{ 'sve640': true }");
+#endif /* disabled for RHEL */
sve_tests_default(qts, "max");
pauth_tests_default(qts, "max");
@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data)
QDict *resp;
char *error;
+#if 0 /* Disabled for Red Hat Enterprise Linux */
assert_error(qts, "cortex-a15",
"We cannot guarantee the CPU type 'cortex-a15' works "
"with KVM on this host", NULL);
+#endif /* disabled for RHEL */
assert_has_feature_enabled(qts, "host", "aarch64");
--
2.35.3

@ -0,0 +1,95 @@
From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Wed, 15 Jun 2022 15:28:27 +0200
Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53
to a57
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu)
RH-Bugzilla: 2060839
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824
Upstream Status: RHEL only
We plan to remove the cortex-a53 from the supported cpu types. Switch
all avocado tests that use it to the cortex-a57, which will work the
same and we intend to keep. We don't want to try and upstream this
change since the better upstream change would be to switch from the
a53 to 'max', but the upstream tests also need to use later guest
kernels to use 'max' (see qemu upstream commit 0942820408dc
("hw/arm/virt: Disable LPA2 for -machine virt-6.2")
Signed-off-by: Andrew Jones <drjones@redhat.com>
---
tests/avocado/replay_kernel.py | 2 +-
tests/avocado/reverse_debugging.py | 2 +-
tests/avocado/tcg_plugins.py | 6 +++---
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 0b2b0dc692..3a7b5f0748 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -147,7 +147,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
- :avocado: tags=cpu:cortex-a53
+ :avocado: tags=cpu:cortex-a57
"""
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py
index d2921e70c3..66d185ed42 100644
--- a/tests/avocado/reverse_debugging.py
+++ b/tests/avocado/reverse_debugging.py
@@ -198,7 +198,7 @@ def test_aarch64_virt(self):
"""
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
- :avocado: tags=cpu:cortex-a53
+ :avocado: tags=cpu:cortex-a57
"""
kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
'/linux/releases/29/Everything/aarch64/os/images/pxeboot'
diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py
index 642d2e49e3..93b3afd823 100644
--- a/tests/avocado/tcg_plugins.py
+++ b/tests/avocado/tcg_plugins.py
@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
- :avocado: tags=cpu:cortex-a53
+ :avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
- :avocado: tags=cpu:cortex-a53
+ :avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self):
:avocado: tags=accel:tcg
:avocado: tags=arch:aarch64
:avocado: tags=machine:virt
- :avocado: tags=cpu:cortex-a53
+ :avocado: tags=cpu:cortex-a57
"""
kernel_path = self._grab_aarch64_kernel()
kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE +
--
2.35.3

@ -0,0 +1,58 @@
From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 9 Jun 2022 11:55:15 +0200
Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs"
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs"
RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s)
RH-Bugzilla: 2094270
RH-Acked-by: Andrew Jones <drjones@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149
Upstream Status: RHEL-only
Tested: with upstream kernel
Downstream QEMU carries a patch that sets the hard limit of possible vCPUs
to the value that the KVM code of the kernel recommends as soft limit.
Upstream KVM code has been changed recently to not use an arbitrary soft
limit anymore, but to cap the value on the amount of available physical
CPUs of the host. This defeats the purpose of the downstream change in
QEMU completely. Drop the downstream-only patch to allow CPU overcommit.
This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
accel/kvm/kvm-all.c | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index fdf0e4d429..5f1377ca04 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms)
soft_vcpus_limit = kvm_recommended_vcpus(s);
hard_vcpus_limit = kvm_max_vcpus(s);
-#ifdef HOST_PPC64
- /*
- * On POWER, the kernel advertises a soft limit based on the
- * number of CPU threads on the host. We want to allow exceeding
- * this for testing purposes, so we don't want to set hard limit
- * to soft limit as on x86.
- */
-#else
- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */
- hard_vcpus_limit = soft_vcpus_limit;
-#endif
-
while (nc->name) {
if (nc->num > soft_vcpus_limit) {
warn_report("Number of %s cpus requested (%d) exceeds "
--
2.31.1

@ -0,0 +1,134 @@
From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 2 Aug 2022 08:19:49 +0200
Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()"
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 112: Fix postcopy migration on s390x
RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2099934
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3.
The simplification of unqueue_page() introduced a bug that sometimes
breaks migration on s390x hosts.
The problem is not fully understood yet, but since we are already in
the freeze for QEMU 7.1 and we need something working there, let's
revert this patch for the upcoming release. The optimization can be
redone later again in a proper way if necessary.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934
Signed-off-by: Thomas Huth <thuth@redhat.com>
Message-Id: <20220802061949.331576-1-thuth@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1)
Conflicts:
migration/trace-events
(trivial contextual conflict)
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
migration/ram.c | 37 ++++++++++++++++++++++++++-----------
migration/trace-events | 3 ++-
2 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/migration/ram.c b/migration/ram.c
index fb6db54642..ee40e4a718 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
{
struct RAMSrcPageRequest *entry;
RAMBlock *block = NULL;
- size_t page_size;
if (!postcopy_has_request(rs)) {
return NULL;
@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
entry = QSIMPLEQ_FIRST(&rs->src_page_requests);
block = entry->rb;
*offset = entry->offset;
- page_size = qemu_ram_pagesize(block);
- /* Each page request should only be multiple page size of the ramblock */
- assert((entry->len % page_size) == 0);
- if (entry->len > page_size) {
- entry->len -= page_size;
- entry->offset += page_size;
+ if (entry->len > TARGET_PAGE_SIZE) {
+ entry->len -= TARGET_PAGE_SIZE;
+ entry->offset += TARGET_PAGE_SIZE;
} else {
memory_region_unref(block->mr);
QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
migration_consume_urgent_request();
}
- trace_unqueue_page(block->idstr, *offset,
- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap));
-
return block;
}
@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
{
RAMBlock *block;
ram_addr_t offset;
+ bool dirty;
+
+ do {
+ block = unqueue_page(rs, &offset);
+ /*
+ * We're sending this page, and since it's postcopy nothing else
+ * will dirty it, and we must make sure it doesn't get sent again
+ * even if this queue request was received after the background
+ * search already sent it.
+ */
+ if (block) {
+ unsigned long page;
+
+ page = offset >> TARGET_PAGE_BITS;
+ dirty = test_bit(page, block->bmap);
+ if (!dirty) {
+ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
+ page);
+ } else {
+ trace_get_queued_page(block->idstr, (uint64_t)offset, page);
+ }
+ }
- block = unqueue_page(rs, &offset);
+ } while (block && !dirty);
if (!block) {
/*
diff --git a/migration/trace-events b/migration/trace-events
index 1aec580e92..09d61ed1f4 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
qemu_file_fclose(void) ""
# ram.c
+get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
+get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI
ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64
ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d"
# multifd.c
multifd_new_send_channel_async(uint8_t id) "channel %u"
--
2.31.1

@ -0,0 +1,51 @@
From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001
From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Date: Mon, 9 May 2022 03:46:23 -0400
Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane
enabled [RHEL only]"
RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]"
RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm)
RH-Bugzilla: 1995710
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
This reverts commit 4e17b1126e.
Over time AioContext usage and coverage has increased, and now block
backend is capable of handling AioContext change upon eject and insert.
Therefore the above downstream-only commit is not necessary anymore,
and can be safely reverted.
X-downstream-only: true
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
hw/scsi/virtio-scsi.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 2450c9438c..db54d104be 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
AioContext *old_context;
int ret;
- /* XXX: Remove this check once block backend is capable of handling
- * AioContext change upon eject/insert.
- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if
- * data plane is not used, both cases are safe for scsi-cd. */
- if (s->ctx && s->ctx != qemu_get_aio_context() &&
- object_dynamic_cast(OBJECT(dev), "scsi-cd")) {
- error_setg(errp, "scsi-cd is not supported by data plane");
- return;
- }
if (s->ctx && !s->dataplane_fenced) {
if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
return;
--
2.31.1

@ -0,0 +1,41 @@
From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 25 Apr 2022 13:42:46 +0800
Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 80: Enable virtio-mem for aarch64
RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9)
RH-Bugzilla: 2044162
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162
This enables virtio-mem device on aarch64 since all needed commits
are ready.
b1b87327a9 hw/arm/virt: Support for virtio-mem-pci
1263615efe virtio-mem: Correct default THP size for ARM64
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
index 5f6ee1de5b..187938573f 100644
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
@@ -22,6 +22,7 @@ CONFIG_VFIO=y
CONFIG_VFIO_PCI=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_MEM=y
CONFIG_XIO3130=y
CONFIG_NVDIMM=y
CONFIG_ACPI_APEI=y
--
2.35.1

@ -0,0 +1,101 @@
From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 10 May 2022 17:10:19 +0200
Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size()
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm)
RH-Bugzilla: 2079938
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
It's true that these functions currently affect the batch size in which
coroutines are reused (i.e. moved from the global release pool to the
allocation pool of a specific thread), but this is a bug and will be
fixed in a separate patch.
In fact, the comment in the header file already just promises that it
influences the pool size, so reflect this in the name of the functions.
As a nice side effect, the shorter function name makes some line
wrapping unnecessary.
Cc: qemu-stable@nongnu.org
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20220510151020.105528-2-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/block/virtio-blk.c | 6 ++----
include/qemu/coroutine.h | 6 +++---
util/qemu-coroutine.c | 4 ++--
3 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 540c38f829..6a1cc41877 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
for (i = 0; i < conf->num_queues; i++) {
virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
}
- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size
- / 2);
+ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
if (err != NULL) {
error_propagate(errp, err);
@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
for (i = 0; i < conf->num_queues; i++) {
virtio_del_queue(vdev, i);
}
- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size
- / 2);
+ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
qemu_del_vm_change_state_handler(s->change);
blockdev_mark_auto_del(s->blk);
virtio_cleanup(vdev);
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index c828a95ee0..5b621d1295 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd);
/**
* Increase coroutine pool size
*/
-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size);
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
/**
- * Devcrease coroutine pool size
+ * Decrease coroutine pool size
*/
-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size);
+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
#include "qemu/lockable.h"
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index c03b2422ff..faca0ca97c 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
return co->ctx;
}
-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size)
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
{
qatomic_add(&pool_batch_size, additional_pool_size);
}
-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size)
+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
{
qatomic_sub(&pool_batch_size, removing_pool_size);
}
--
2.31.1

@ -0,0 +1,138 @@
From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 10 May 2022 17:10:20 +0200
Subject: [PATCH 08/16] coroutine: Revert to constant batch size
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size
RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm)
RH-Bugzilla: 2079938
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Commit 4c41c69e changed the way the coroutine pool is sized because for
virtio-blk devices with a large queue size and heavy I/O, it was just
too small and caused coroutines to be deleted and reallocated soon
afterwards. The change made the size dynamic based on the number of
queues and the queue size of virtio-blk devices.
There are two important numbers here: Slightly simplified, when a
coroutine terminates, it is generally stored in the global release pool
up to a certain pool size, and if the pool is full, it is freed.
Conversely, when allocating a new coroutine, the coroutines in the
release pool are reused if the pool already has reached a certain
minimum size (the batch size), otherwise we allocate new coroutines.
The problem after commit 4c41c69e is that it not only increases the
maximum pool size (which is the intended effect), but also the batch
size for reusing coroutines (which is a bug). It means that in cases
with many devices and/or a large queue size (which defaults to the
number of vcpus for virtio-blk-pci), many thousand coroutines could be
sitting in the release pool without being reused.
This is not only a waste of memory and allocations, but it actually
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
because each coroutine requires two mappings (its stack and the guard
page for the stack), causing it to abort() in qemu_alloc_stack() because
when the limit is hit, mprotect() starts to fail with ENOMEM.
In order to fix the problem, change the batch size back to 64 to avoid
uselessly accumulating coroutines in the release pool, but keep the
dynamic maximum pool size so that coroutines aren't freed too early
in heavy I/O scenarios.
Note that this fix doesn't strictly make it impossible to hit the limit,
but this would only happen if most of the coroutines are actually in use
at the same time, not just sitting in a pool. This is the same behaviour
as we already had before commit 4c41c69e. Fully preventing this would
require allowing qemu_coroutine_create() to return an error, but it
doesn't seem to be a scenario that people hit in practice.
Cc: qemu-stable@nongnu.org
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
Conflicts:
util/qemu-coroutine.c
Trivial merge conflict because we don't have commit ac387a08 downstream.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
util/qemu-coroutine.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index faca0ca97c..804f672e0a 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -20,14 +20,20 @@
#include "qemu/coroutine_int.h"
#include "block/aio.h"
-/** Initial batch size is 64, and is increased on demand */
+/**
+ * The minimal batch size is always 64, coroutines from the release_pool are
+ * reused as soon as there are 64 coroutines in it. The maximum pool size starts
+ * with 64 and is increased on demand so that coroutines are not deleted even if
+ * they are not immediately reused.
+ */
enum {
- POOL_INITIAL_BATCH_SIZE = 64,
+ POOL_MIN_BATCH_SIZE = 64,
+ POOL_INITIAL_MAX_SIZE = 64,
};
/** Free list to speed up creation */
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
static unsigned int release_pool_size;
static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
static __thread unsigned int alloc_pool_size;
@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
if (CONFIG_COROUTINE_POOL) {
co = QSLIST_FIRST(&alloc_pool);
if (!co) {
- if (release_pool_size > qatomic_read(&pool_batch_size)) {
+ if (release_pool_size > POOL_MIN_BATCH_SIZE) {
/* Slow path; a good place to register the destructor, too. */
if (!coroutine_pool_cleanup_notifier.notify) {
coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co)
co->caller = NULL;
if (CONFIG_COROUTINE_POOL) {
- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
+ if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
qatomic_inc(&release_pool_size);
return;
}
- if (alloc_pool_size < qatomic_read(&pool_batch_size)) {
+ if (alloc_pool_size < qatomic_read(&pool_max_size)) {
QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
alloc_pool_size++;
return;
@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
{
- qatomic_add(&pool_batch_size, additional_pool_size);
+ qatomic_add(&pool_max_size, additional_pool_size);
}
void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
{
- qatomic_sub(&pool_batch_size, removing_pool_size);
+ qatomic_sub(&pool_max_size, removing_pool_size);
}
--
2.31.1

@ -0,0 +1,132 @@
From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 17 May 2022 12:07:56 +0100
Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm)
RH-Bugzilla: 1952483
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
Thread-Local Storage variables cannot be used directly from coroutine
code because the compiler may optimize TLS variable accesses across
qemu_coroutine_yield() calls. When the coroutine is re-entered from
another thread the TLS variables from the old thread must no longer be
used.
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20220307153853.602859-2-stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++--------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index 904b375192..127d5a13c8 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include <ucontext.h>
#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
#ifdef CONFIG_VALGRIND_H
#include <valgrind/valgrind.h>
@@ -66,8 +67,8 @@ typedef struct {
/**
* Per-thread coroutine bookkeeping
*/
-static __thread CoroutineUContext leader;
-static __thread Coroutine *current;
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader);
/*
* va_args to makecontext() must be type 'int', so passing
@@ -97,14 +98,15 @@ static inline __attribute__((always_inline))
void finish_switch_fiber(void *fake_stack_save)
{
#ifdef CONFIG_ASAN
+ CoroutineUContext *leaderp = get_ptr_leader();
const void *bottom_old;
size_t size_old;
__sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
- if (!leader.stack) {
- leader.stack = (void *)bottom_old;
- leader.stack_size = size_old;
+ if (!leaderp->stack) {
+ leaderp->stack = (void *)bottom_old;
+ leaderp->stack_size = size_old;
}
#endif
#ifdef CONFIG_TSAN
@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1)
/* Initialize longjmp environment and switch back the caller */
if (!sigsetjmp(self->env, 0)) {
- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack,
- leader.stack_size);
+ CoroutineUContext *leaderp = get_ptr_leader();
+
+ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save,
+ leaderp->stack, leaderp->stack_size);
start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */
siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
}
@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
int ret;
void *fake_stack_save = NULL;
- current = to_;
+ set_current(to_);
ret = sigsetjmp(from->env, 0);
if (ret == 0) {
@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
Coroutine *qemu_coroutine_self(void)
{
- if (!current) {
- current = &leader.base;
+ Coroutine *self = get_current();
+ CoroutineUContext *leaderp = get_ptr_leader();
+
+ if (!self) {
+ self = &leaderp->base;
+ set_current(self);
}
#ifdef CONFIG_TSAN
- if (!leader.tsan_co_fiber) {
- leader.tsan_co_fiber = __tsan_get_current_fiber();
+ if (!leaderp->tsan_co_fiber) {
+ leaderp->tsan_co_fiber = __tsan_get_current_fiber();
}
#endif
- return current;
+ return self;
}
bool qemu_in_coroutine(void)
{
- return current && current->caller;
+ Coroutine *self = get_current();
+
+ return self && self->caller;
}
--
2.31.1

@ -0,0 +1,139 @@
From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 17 May 2022 12:08:04 +0100
Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm)
RH-Bugzilla: 1952483
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
Thread-Local Storage variables cannot be used directly from coroutine
code because the compiler may optimize TLS variable accesses across
qemu_coroutine_yield() calls. When the coroutine is re-entered from
another thread the TLS variables from the old thread must no longer be
used.
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
The alloc_pool QSLIST needs a typedef so the return value of
get_ptr_alloc_pool() can be stored in a local variable.
One example of why this code is necessary: a coroutine that yields
before calling qemu_coroutine_create() to create another coroutine is
affected by the TLS issue.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20220307153853.602859-3-stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90)
Conflicts:
- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to
constant batch size").
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/qemu-coroutine.c | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 804f672e0a..4a8bd63ef0 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -18,6 +18,7 @@
#include "qemu/atomic.h"
#include "qemu/coroutine.h"
#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
#include "block/aio.h"
/**
@@ -35,17 +36,20 @@ enum {
static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
static unsigned int release_pool_size;
-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
-static __thread unsigned int alloc_pool_size;
-static __thread Notifier coroutine_pool_cleanup_notifier;
+
+typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool);
+QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size);
+QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier);
static void coroutine_pool_cleanup(Notifier *n, void *value)
{
Coroutine *co;
Coroutine *tmp;
+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) {
+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
qemu_coroutine_delete(co);
}
}
@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
Coroutine *co = NULL;
if (CONFIG_COROUTINE_POOL) {
- co = QSLIST_FIRST(&alloc_pool);
+ CoroutineQSList *alloc_pool = get_ptr_alloc_pool();
+
+ co = QSLIST_FIRST(alloc_pool);
if (!co) {
if (release_pool_size > POOL_MIN_BATCH_SIZE) {
/* Slow path; a good place to register the destructor, too. */
- if (!coroutine_pool_cleanup_notifier.notify) {
- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
+ if (!notifier->notify) {
+ notifier->notify = coroutine_pool_cleanup;
+ qemu_thread_atexit_add(notifier);
}
/* This is not exact; there could be a little skew between
* release_pool_size and the actual size of release_pool. But
* it is just a heuristic, it does not need to be perfect.
*/
- alloc_pool_size = qatomic_xchg(&release_pool_size, 0);
- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
- co = QSLIST_FIRST(&alloc_pool);
+ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0));
+ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool);
+ co = QSLIST_FIRST(alloc_pool);
}
}
if (co) {
- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
- alloc_pool_size--;
+ QSLIST_REMOVE_HEAD(alloc_pool, pool_next);
+ set_alloc_pool_size(get_alloc_pool_size() - 1);
}
}
@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co)
qatomic_inc(&release_pool_size);
return;
}
- if (alloc_pool_size < qatomic_read(&pool_max_size)) {
- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
- alloc_pool_size++;
+ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
+ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
+ set_alloc_pool_size(get_alloc_pool_size() + 1);
return;
}
}
--
2.31.1

@ -0,0 +1,99 @@
From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 17 May 2022 12:08:12 +0100
Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables
RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm)
RH-Bugzilla: 1952483
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
Thread-Local Storage variables cannot be used directly from coroutine
code because the compiler may optimize TLS variable accesses across
qemu_coroutine_yield() calls. When the coroutine is re-entered from
another thread the TLS variables from the old thread must no longer be
used.
Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables.
I think coroutine-win32.c could get away with __thread because the
variables are only used in situations where either the stale value is
correct (current) or outside coroutine context (loading leader when
current is NULL). Due to the difficulty of being sure that this is
really safe in all scenarios it seems worth converting it anyway.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20220307153853.602859-4-stefanha@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
util/coroutine-win32.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
index de6bd4fd3e..c02a62c896 100644
--- a/util/coroutine-win32.c
+++ b/util/coroutine-win32.c
@@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/coroutine_int.h"
+#include "qemu/coroutine-tls.h"
typedef struct
{
@@ -34,8 +35,8 @@ typedef struct
CoroutineAction action;
} CoroutineWin32;
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
+QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader);
+QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current);
/* This function is marked noinline to prevent GCC from inlining it
* into coroutine_trampoline(). If we allow it to do that then it
@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
- current = to_;
+ set_current(to_);
to->action = action;
SwitchToFiber(to->fiber);
@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_)
Coroutine *qemu_coroutine_self(void)
{
+ Coroutine *current = get_current();
+
if (!current) {
- current = &leader.base;
- leader.fiber = ConvertThreadToFiber(NULL);
+ CoroutineWin32 *leader = get_ptr_leader();
+
+ current = &leader->base;
+ set_current(current);
+ leader->fiber = ConvertThreadToFiber(NULL);
}
return current;
}
bool qemu_in_coroutine(void)
{
+ Coroutine *current = get_current();
+
return current && current->caller;
}
--
2.31.1

@ -0,0 +1,179 @@
From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:35 +0800
Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build
PPTT table
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
When the PPTT table is built, the CPU topology is re-calculated, but
it's unecessary because the CPU topology has been populated in
virt_possible_cpu_arch_ids() on arm/virt machine.
This reworks build_pptt() to avoid by reusing the existing IDs in
ms->possible_cpus. Currently, the only user of build_pptt() is
arm/virt machine.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Message-id: 20220503140304.855514-7-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/acpi/aml-build.c | 111 +++++++++++++++++++-------------------------
1 file changed, 48 insertions(+), 63 deletions(-)
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 4086879ebf..e6bfac95c7 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
const char *oem_id, const char *oem_table_id)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
- GQueue *list = g_queue_new();
- guint pptt_start = table_data->len;
- guint parent_offset;
- guint length, i;
- int uid = 0;
- int socket;
+ CPUArchIdList *cpus = ms->possible_cpus;
+ int64_t socket_id = -1, cluster_id = -1, core_id = -1;
+ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0;
+ uint32_t pptt_start = table_data->len;
+ int n;
AcpiTable table = { .sig = "PPTT", .rev = 2,
.oem_id = oem_id, .oem_table_id = oem_table_id };
acpi_table_begin(&table, table_data);
- for (socket = 0; socket < ms->smp.sockets; socket++) {
- g_queue_push_tail(list,
- GUINT_TO_POINTER(table_data->len - pptt_start));
- build_processor_hierarchy_node(
- table_data,
- /*
- * Physical package - represents the boundary
- * of a physical package
- */
- (1 << 0),
- 0, socket, NULL, 0);
- }
-
- if (mc->smp_props.clusters_supported) {
- length = g_queue_get_length(list);
- for (i = 0; i < length; i++) {
- int cluster;
-
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
- for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
- g_queue_push_tail(list,
- GUINT_TO_POINTER(table_data->len - pptt_start));
- build_processor_hierarchy_node(
- table_data,
- (0 << 0), /* not a physical package */
- parent_offset, cluster, NULL, 0);
- }
+ /*
+ * This works with the assumption that cpus[n].props.*_id has been
+ * sorted from top to down levels in mc->possible_cpu_arch_ids().
+ * Otherwise, the unexpected and duplicated containers will be
+ * created.
+ */
+ for (n = 0; n < cpus->len; n++) {
+ if (cpus->cpus[n].props.socket_id != socket_id) {
+ assert(cpus->cpus[n].props.socket_id > socket_id);
+ socket_id = cpus->cpus[n].props.socket_id;
+ cluster_id = -1;
+ core_id = -1;
+ socket_offset = table_data->len - pptt_start;
+ build_processor_hierarchy_node(table_data,
+ (1 << 0), /* Physical package */
+ 0, socket_id, NULL, 0);
}
- }
- length = g_queue_get_length(list);
- for (i = 0; i < length; i++) {
- int core;
-
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
- for (core = 0; core < ms->smp.cores; core++) {
- if (ms->smp.threads > 1) {
- g_queue_push_tail(list,
- GUINT_TO_POINTER(table_data->len - pptt_start));
- build_processor_hierarchy_node(
- table_data,
- (0 << 0), /* not a physical package */
- parent_offset, core, NULL, 0);
- } else {
- build_processor_hierarchy_node(
- table_data,
- (1 << 1) | /* ACPI Processor ID valid */
- (1 << 3), /* Node is a Leaf */
- parent_offset, uid++, NULL, 0);
+ if (mc->smp_props.clusters_supported) {
+ if (cpus->cpus[n].props.cluster_id != cluster_id) {
+ assert(cpus->cpus[n].props.cluster_id > cluster_id);
+ cluster_id = cpus->cpus[n].props.cluster_id;
+ core_id = -1;
+ cluster_offset = table_data->len - pptt_start;
+ build_processor_hierarchy_node(table_data,
+ (0 << 0), /* Not a physical package */
+ socket_offset, cluster_id, NULL, 0);
}
+ } else {
+ cluster_offset = socket_offset;
}
- }
- length = g_queue_get_length(list);
- for (i = 0; i < length; i++) {
- int thread;
+ if (ms->smp.threads == 1) {
+ build_processor_hierarchy_node(table_data,
+ (1 << 1) | /* ACPI Processor ID valid */
+ (1 << 3), /* Node is a Leaf */
+ cluster_offset, n, NULL, 0);
+ } else {
+ if (cpus->cpus[n].props.core_id != core_id) {
+ assert(cpus->cpus[n].props.core_id > core_id);
+ core_id = cpus->cpus[n].props.core_id;
+ core_offset = table_data->len - pptt_start;
+ build_processor_hierarchy_node(table_data,
+ (0 << 0), /* Not a physical package */
+ cluster_offset, core_id, NULL, 0);
+ }
- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
- for (thread = 0; thread < ms->smp.threads; thread++) {
- build_processor_hierarchy_node(
- table_data,
+ build_processor_hierarchy_node(table_data,
(1 << 1) | /* ACPI Processor ID valid */
(1 << 2) | /* Processor is a Thread */
(1 << 3), /* Node is a Leaf */
- parent_offset, uid++, NULL, 0);
+ core_offset, n, NULL, 0);
}
}
- g_queue_free(list);
acpi_table_end(linker, &table);
}
--
2.31.1

@ -0,0 +1,74 @@
From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:35 +0800
Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
Currently, the SMP configuration isn't considered when the CPU
topology is populated. In this case, it's impossible to provide
the default CPU-to-NUMA mapping or association based on the socket
ID of the given CPU.
This takes account of SMP configuration when the CPU topology
is populated. The die ID for the given CPU isn't assigned since
it's not supported on arm/virt machine. Besides, the used SMP
configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted
to avoid testing failure
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 20220503140304.855514-4-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8be12e121d..a87c8d396a 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
int n;
unsigned int max_cpus = ms->smp.max_cpus;
VirtMachineState *vms = VIRT_MACHINE(ms);
+ MachineClass *mc = MACHINE_GET_CLASS(vms);
if (ms->possible_cpus) {
assert(ms->possible_cpus->len == max_cpus);
@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
ms->possible_cpus->cpus[n].type = ms->cpu_type;
ms->possible_cpus->cpus[n].arch_id =
virt_cpu_mp_affinity(vms, n);
+
+ assert(!mc->smp_props.dies_supported);
+ ms->possible_cpus->cpus[n].props.has_socket_id = true;
+ ms->possible_cpus->cpus[n].props.socket_id =
+ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads);
+ ms->possible_cpus->cpus[n].props.has_cluster_id = true;
+ ms->possible_cpus->cpus[n].props.cluster_id =
+ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters;
+ ms->possible_cpus->cpus[n].props.has_core_id = true;
+ ms->possible_cpus->cpus[n].props.core_id =
+ (n / ms->smp.threads) % ms->smp.cores;
ms->possible_cpus->cpus[n].props.has_thread_id = true;
- ms->possible_cpus->cpus[n].props.thread_id = n;
+ ms->possible_cpus->cpus[n].props.thread_id =
+ n % ms->smp.threads;
}
return ms->possible_cpus;
}
--
2.31.1

@ -0,0 +1,88 @@
From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:35 +0800
Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
When CPU-to-NUMA association isn't explicitly provided by users,
the default one is given by mc->get_default_cpu_node_id(). However,
the CPU topology isn't fully considered in the default association
and this causes CPU topology broken warnings on booting Linux guest.
For example, the following warning messages are observed when the
Linux guest is booted with the following command lines.
/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \
-accel kvm -machine virt,gic-version=host \
-cpu host \
-smp 6,sockets=2,cores=3,threads=1 \
-m 1024M,slots=16,maxmem=64G \
-object memory-backend-ram,id=mem0,size=128M \
-object memory-backend-ram,id=mem1,size=128M \
-object memory-backend-ram,id=mem2,size=128M \
-object memory-backend-ram,id=mem3,size=128M \
-object memory-backend-ram,id=mem4,size=128M \
-object memory-backend-ram,id=mem4,size=384M \
-numa node,nodeid=0,memdev=mem0 \
-numa node,nodeid=1,memdev=mem1 \
-numa node,nodeid=2,memdev=mem2 \
-numa node,nodeid=3,memdev=mem3 \
-numa node,nodeid=4,memdev=mem4 \
-numa node,nodeid=5,memdev=mem5
:
alternatives: patching kernel code
BUG: arch topology borken
the CLS domain not a subset of the MC domain
<the above error log repeats>
BUG: arch topology borken
the DIE domain not a subset of the NODE domain
With current implementation of mc->get_default_cpu_node_id(),
CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately.
That's incorrect because CPU#0/1/2 should be associated with same
NUMA node because they're seated in same socket.
This fixes the issue by considering the socket ID when the default
CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids().
With this applied, no more CPU topology broken warnings are seen
from the Linux guest. The 6 CPUs are associated with NODE#0/1, but
there are no CPUs associated with NODE#2/3/4/5.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Message-id: 20220503140304.855514-6-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/arm/virt.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a87c8d396a..95d012d6eb 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx)
{
- return idx % ms->numa_state->num_nodes;
+ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id;
+
+ return socket_id % ms->numa_state->num_nodes;
}
static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms)
--
2.31.1

@ -0,0 +1,56 @@
From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Wed, 4 May 2022 11:11:54 +0200
Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in
instance/class_init()
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option
RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm)
RH-Bugzilla: 2046029
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161
Upstream Status: RHEL-only
Tested: Boot RHEL guest and check migration from 8.6 to 9.1
(with custom additions)
During the 7.0 rebase, the initialization of highmem_mmio and
highmem_redists was forgotten in rhel_virt_instance_init().
Fix it to match virt_instance_init() code.
Also mc->smp_props.clusters_supported was missing in
rhel_machine_class_init().
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/arm/virt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index bde4f77994..8be12e121d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
hc->unplug_request = virt_machine_device_unplug_request_cb;
hc->unplug = virt_machine_device_unplug_cb;
mc->nvdimm_supported = true;
+ mc->smp_props.clusters_supported = true;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
mc->default_ram_id = "mach-virt.ram";
@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj)
vms->gic_version = VIRT_GIC_VERSION_NOSEL;
vms->highmem_ecam = !vmc->no_highmem_ecam;
+ vms->highmem_mmio = true;
+ vms->highmem_redists = true;
if (vmc->no_its) {
vms->its = false;
--
2.31.1

@ -0,0 +1,76 @@
From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Tue, 3 May 2022 08:56:52 +0200
Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option
RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm)
RH-Bugzilla: 2046029
RH-Acked-by: Gavin Shan <gshan@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161
Upstream Status: RHEL-only
Tested: Boot RHEL guest and check the option is not available
In RHEL we do not want to expose the dtb-kaslr-seed virt machine
option. Indeed the default 'on' value matches our need as
random data in the DTB does not cause any boot failure and we
want to support KASLR for the guest.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
---
hw/arm/virt.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e06862d22a..bde4f77994 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
vms->its = value;
}
+#if 0 /* Disabled for Red Hat Enterprise Linux */
static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp)
{
VirtMachineState *vms = VIRT_MACHINE(obj);
@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp)
vms->dtb_kaslr_seed = value;
}
+#endif /* disabled for RHEL */
static char *virt_get_oem_id(Object *obj, Error **errp)
{
@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
"Override the default value of field OEM Table ID "
"in ACPI table header."
"The string may be up to 8 bytes in size");
-
- object_class_property_add_bool(oc, "dtb-kaslr-seed",
- virt_get_dtb_kaslr_seed,
- virt_set_dtb_kaslr_seed);
- object_class_property_set_description(oc, "dtb-kaslr-seed",
- "Set off to disable passing of kaslr-seed "
- "dtb node to guest");
}
static void rhel_virt_instance_init(Object *obj)
@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj)
/* MTE is disabled by default and non-configurable for RHEL */
vms->mte = false;
- /* Supply a kaslr-seed by default */
+ /* Supply a kaslr-seed by default and non-configurable for RHEL */
vms->dtb_kaslr_seed = true;
vms->irqmap = a15irqmap;
--
2.31.1

@ -0,0 +1,96 @@
From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 18 Nov 2021 12:57:32 +0100
Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun
(CVE-2021-3507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Jon Maloy <jmaloy@redhat.com>
RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm)
RH-Bugzilla: 1951522
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
Per the 82078 datasheet, if the end-of-track (EOT byte in
the FIFO) is more than the number of sectors per side, the
command is terminated unsuccessfully:
* 5.2.5 DATA TRANSFER TERMINATION
The 82078 supports terminal count explicitly through
the TC pin and implicitly through the underrun/over-
run and end-of-track (EOT) functions. For full sector
transfers, the EOT parameter can define the last
sector to be transferred in a single or multisector
transfer. If the last sector to be transferred is a par-
tial sector, the host can stop transferring the data in
mid-sector, and the 82078 will continue to complete
the sector as if a hardware TC was received. The
only difference between these implicit functions and
TC is that they return "abnormal termination" result
status. Such status indications can be ignored if they
were expected.
* 6.1.3 READ TRACK
This command terminates when the EOT specified
number of sectors have been read. If the 82078
does not find an I D Address Mark on the diskette
after the second· occurrence of a pulse on the
INDX# pin, then it sets the IC code in Status Regis-
ter 0 to "01" (Abnormal termination), sets the MA bit
in Status Register 1 to "1", and terminates the com-
mand.
* 6.1.6 VERIFY
Refer to Table 6-6 and Table 6-7 for information
concerning the values of MT and EC versus SC and
EOT value.
* Table 6·6. Result Phase Table
* Table 6-7. Verify Command Result Phase Table
Fix by aborting the transfer when EOT > # Sectors Per Side.
Cc: qemu-stable@nongnu.org
Cc: Hervé Poussineau <hpoussin@reactos.org>
Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
Reported-by: Alexander Bulekov <alxndr@bu.edu>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211118115733.4038610-2-philmd@redhat.com>
Reviewed-by: Hanna Reitz <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367)
Signed-off-by: Jon Maloy <jmaloy@redhat.com>
---
hw/block/fdc.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index ca1776121f..6481ec0cfb 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
int tmp;
fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
tmp = (fdctrl->fifo[6] - ks + 1);
+ if (tmp < 0) {
+ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
+ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
+ fdctrl->fifo[3] = kt;
+ fdctrl->fifo[4] = kh;
+ fdctrl->fifo[5] = ks;
+ return;
+ }
if (fdctrl->fifo[0] & 0x80)
tmp += fdctrl->fifo[6];
fdctrl->data_len *= tmp;
--
2.31.1

@ -0,0 +1,95 @@
From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 21 Jul 2022 15:27:35 +0200
Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eugenio Pérez <eperezma@redhat.com>
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm)
RH-Bugzilla: 1939363
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
Bugzilla: https://bugzilla.redhat.com/1939363
Upstream Status: git://git.qemu.org/qemu.git
commit d792199de55ca5cb5334016884039c740290b5c7
Author: Philippe Mathieu-Daudé <f4bug@amsat.org>
Date: Thu May 12 19:57:46 2022 +0200
hw/virtio: Replace g_memdup() by g_memdup2()
Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538
The old API took the size of the memory to duplicate as a guint,
whereas most memory functions take memory sizes as a gsize. This
made it easy to accidentally pass a gsize to g_memdup(). For large
values, that would lead to a silent truncation of the size from 64
to 32 bits, and result in a heap area being returned which is
significantly smaller than what the caller expects. This can likely
be exploited in various modules to cause a heap buffer overflow.
Replace g_memdup() by the safer g_memdup2() wrapper.
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20220512175747.142058-6-eperezma@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
hw/net/virtio-net.c | 3 ++-
hw/virtio/virtio-crypto.c | 6 +++---
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 099e65036d..633de61513 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
iov_cnt = elem->out_num;
- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
+ iov2 = iov = g_memdup2(elem->out_sg,
+ sizeof(struct iovec) * elem->out_num);
s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
if (s != sizeof(ctrl)) {
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index dcd80b904d..0e31e3cc04 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
}
out_num = elem->out_num;
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
}
out_num = elem->out_num;
- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num);
+ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num);
out_iov = out_iov_copy;
in_num = elem->in_num;
- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num);
+ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num);
in_iov = in_iov_copy;
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
--
2.31.1

@ -0,0 +1,68 @@
From 8452a7925e18d6d57e2ac787b192097d4136b104 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 18 Aug 2022 17:01:13 +0200
Subject: [PATCH 2/2] i386: do kvm_put_msr_feature_control() first thing when
vCPU is reset
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation
RH-Bugzilla: 2117546
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [2/2] 08d5992691ba70561ce0a5b7f4504618f96a2ee6
kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when
it is in VMX root operation. Do kvm_put_msr_feature_control() before
kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also
seems logical to do kvm_put_msr_feature_control() before
kvm_put_nested_state() and not after it, especially when 'real' nested
state is set.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220818150113.479917-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c)
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
target/i386/kvm/kvm.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 9feb98fe0b..ef70e2c85f 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4356,6 +4356,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+ /*
+ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
+ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
+ * preceed kvm_put_nested_state() when 'real' nested state is set.
+ */
+ if (level >= KVM_PUT_RESET_STATE) {
+ ret = kvm_put_msr_feature_control(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
/* must be before kvm_put_nested_state so that EFER.SVME is set */
ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu);
if (ret < 0) {
@@ -4367,11 +4379,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
if (ret < 0) {
return ret;
}
-
- ret = kvm_put_msr_feature_control(x86_cpu);
- if (ret < 0) {
- return ret;
- }
}
if (level == KVM_PUT_FULL_STATE) {
--
2.31.1

@ -0,0 +1,95 @@
From b84bb71165c97b475548edc1c07decccca53cf16 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 18 Aug 2022 17:01:12 +0200
Subject: [PATCH 1/2] i386: reset KVM nested state upon CPU reset
RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-MergeRequest: 217: i386: fix 'system_reset' when the VM is in VMX root operation
RH-Bugzilla: 2117546
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [1/2] b329f053a027761f50187e4ca7fd6b50ac32d2ad
Make sure env->nested_state is cleaned up when a vCPU is reset, it may
be stale after an incoming migration, kvm_arch_put_registers() may
end up failing or putting vCPU in a weird state.
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20220818150113.479917-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b)
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++----------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 6d1e009443..9feb98fe0b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1617,6 +1617,30 @@ static void kvm_init_xsave(CPUX86State *env)
env->xsave_buf_len);
}
+static void kvm_init_nested_state(CPUX86State *env)
+{
+ struct kvm_vmx_nested_state_hdr *vmx_hdr;
+ uint32_t size;
+
+ if (!env->nested_state) {
+ return;
+ }
+
+ size = env->nested_state->size;
+
+ memset(env->nested_state, 0, size);
+ env->nested_state->size = size;
+
+ if (cpu_has_vmx(env)) {
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
+ vmx_hdr = &env->nested_state->hdr.vmx;
+ vmx_hdr->vmxon_pa = -1ull;
+ vmx_hdr->vmcs12_pa = -1ull;
+ } else if (cpu_has_svm(env)) {
+ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
+ }
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -2044,19 +2068,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
if (cpu_has_vmx(env) || cpu_has_svm(env)) {
- struct kvm_vmx_nested_state_hdr *vmx_hdr;
-
env->nested_state = g_malloc0(max_nested_state_len);
env->nested_state->size = max_nested_state_len;
- if (cpu_has_vmx(env)) {
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
- vmx_hdr = &env->nested_state->hdr.vmx;
- vmx_hdr->vmxon_pa = -1ull;
- vmx_hdr->vmcs12_pa = -1ull;
- } else {
- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
- }
+ kvm_init_nested_state(env);
}
}
@@ -2121,6 +2136,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
/* enabled by default */
env->poll_control_msr = 1;
+ kvm_init_nested_state(env);
+
sev_es_set_reset_vector(CPU(cpu));
}
--
2.31.1

@ -0,0 +1,52 @@
From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Thu, 21 Apr 2022 16:24:35 +0200
Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s)
RH-Bugzilla: 2072379
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
FUSE exports' allow-other option defaults to "auto", which means that it
will try passing allow_other as a mount option, and fall back to not
using it when an error occurs. We make no effort to hide fusermount's
error message (because it would be difficult, and because users might
want to know about the fallback occurring), and so when allow_other does
not work (primarily when /etc/fuse.conf does not contain
user_allow_other), this error message will appear and break the
reference output.
We do not need allow_other here, though, so we can just pass
allow-other=off to fix that.
Reported-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220421142435.569600-1-hreitz@redhat.com>
Tested-by: Markus Armbruster <armbru@redhat.com>
Tested-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
tests/qemu-iotests/108 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
index a3090e2875..4681c7c769 100755
--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
@@ -326,7 +326,7 @@ else
$QSD \
--blockdev file,node-name=export-node,filename="$TEST_IMG" \
- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \
--pidfile "$TEST_DIR/qsd.pid" \
&
--
2.31.1

@ -0,0 +1,445 @@
From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Tue, 5 Apr 2022 15:46:51 +0200
Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s)
RH-Bugzilla: 2072379
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
One clear problem with how qcow2's refcount structure rebuild algorithm
used to be before "qcow2: Improve refcount structure rebuilding" was
that it is prone to failure for qcow2 images on block devices: There is
generally unused space after the actual image, and if that exceeds what
one refblock covers, the old algorithm would invariably write the
reftable past the block device's end, which cannot work. The new
algorithm does not have this problem.
Test it with three tests:
(1) Create an image with more empty space at the end than what one
refblock covers, see whether rebuilding the refcount structures
results in a change in the image file length. (It should not.)
(2) Leave precisely enough space somewhere at the beginning of the image
for the new reftable (and the refblock for that place), see whether
the new algorithm puts the reftable there. (It should.)
(3) Test the original problem: Create (something like) a block device
with a fixed size, then create a qcow2 image in there, write some
data, and then have qemu-img check rebuild the refcount structures.
Before HEAD^, the reftable would have been written past the image
file end, i.e. outside of what the block device provides, which
cannot work. HEAD^ should have fixed that.
("Something like a block device" means a loop device if we can use
one ("sudo -n losetup" works), or a FUSE block export with
growable=false otherwise.)
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220405134652.19278-3-hreitz@redhat.com>
(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162)
Conflicts:
- 108: The downstream qemu-storage-daemon does not support --daemonize,
so this switch has been replaced by a loop waiting for the PID file to
appear
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++-
tests/qemu-iotests/108.out | 81 ++++++++++++
2 files changed, 343 insertions(+), 1 deletion(-)
diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
index 56339ab2c5..a3090e2875 100755
--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
@@ -30,13 +30,20 @@ status=1 # failure is the default!
_cleanup()
{
- _cleanup_test_img
+ _cleanup_test_img
+ if [ -f "$TEST_DIR/qsd.pid" ]; then
+ qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+ kill -KILL "$qsd_pid"
+ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
+ fi
+ rm -f "$TEST_DIR/fuse-export"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
# get standard environment, filters and checks
. ./common.rc
. ./common.filter
+. ./common.qemu
# This tests qcow2-specific low-level functionality
_supported_fmt qcow2
@@ -47,6 +54,22 @@ _supported_os Linux
# files
_unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file
+# This test either needs sudo -n losetup or FUSE exports to work
+if sudo -n losetup &>/dev/null; then
+ loopdev=true
+else
+ loopdev=false
+
+ # QSD --export fuse will either yield "Parameter 'id' is missing"
+ # or "Invalid parameter 'fuse'", depending on whether there is
+ # FUSE support or not.
+ error=$($QSD --export fuse 2>&1)
+ if [[ $error = *"'fuse'"* ]]; then
+ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
+ 'neither is available'
+ fi
+fi
+
echo
echo '=== Repairing an image without any refcount table ==='
echo
@@ -138,6 +161,244 @@ _make_test_img 64M
poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
_check_test_img -r all
+echo
+echo '=== Check rebuilt reftable location ==='
+
+# In an earlier version of the refcount rebuild algorithm, the
+# reftable was generally placed at the image end (unless something was
+# allocated in the area covered by the refblock right before the image
+# file end, then we would try to place the reftable in that refblock).
+# This was later changed so the reftable would be placed in the
+# earliest possible location. Test this.
+
+echo
+echo '--- Does the image size increase? ---'
+echo
+
+# First test: Just create some image, write some data to it, and
+# resize it so there is free space at the end of the image (enough
+# that it spans at least one full refblock, which for cluster_size=512
+# images, spans 128k). With the old algorithm, the reftable would
+# have then been placed at the end of the image file, but with the new
+# one, it will be put in that free space.
+# We want to check whether the size of the image file increases due to
+# rebuilding the refcount structures (it should not).
+
+_make_test_img -o 'cluster_size=512' 1M
+# Write something
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Add free space
+file_len=$(stat -c '%s' "$TEST_IMG")
+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+# Check whether rebuilding the refcount structures increases the image
+# file size
+file_len=$(stat -c '%s' "$TEST_IMG")
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
+
+if [[ $file_len -eq $post_repair_file_len ]]; then
+ echo 'OK: Image size did not change'
+else
+ echo 'ERROR: Image size differs' \
+ "($file_len before, $post_repair_file_len after)"
+fi
+
+echo
+echo '--- Will the reftable occupy a hole specifically left for it? ---'
+echo
+
+# Note: With cluster_size=512, every refblock covers 128k.
+# The reftable covers 8M per reftable cluster.
+
+# Create an image that requires two reftable clusters (just because
+# this is more interesting than a single-clustered reftable).
+_make_test_img -o 'cluster_size=512' 9M
+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
+
+# Writing 8M will have resized the reftable. Unfortunately, doing so
+# will leave holes in the file, so we need to fill them up so we can
+# be sure the whole file is allocated. Do that by writing
+# consecutively smaller chunks starting from 8 MB, until the file
+# length increases even with a chunk size of 512. Then we must have
+# filled all holes.
+ofs=$((8 * 1024 * 1024))
+block_len=$((16 * 1024))
+while [[ $block_len -ge 512 ]]; do
+ file_len=$(stat -c '%s' "$TEST_IMG")
+ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
+ # Do not include this in the reference output, it does not
+ # really matter which qemu-io calls we do here exactly
+ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
+ ofs=$((ofs + block_len))
+ done
+ block_len=$((block_len / 2))
+done
+
+# Fill up to 9M (do not include this in the reference output either,
+# $ofs is random for all we know)
+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
+
+# Make space as follows:
+# - For the first refblock: Right at the beginning of the image (this
+# refblock is placed in the first place possible),
+# - For the reftable somewhere soon afterwards, still near the
+# beginning of the image (i.e. covered by the first refblock); the
+# reftable too is placed in the first place possible, but only after
+# all refblocks have been placed)
+# No space is needed for the other refblocks, because no refblock is
+# put before the space it covers. In this test case, we do not mind
+# if they are placed at the image file's end.
+
+# Before we make that space, we have to find out the host offset of
+# the area that belonged to the two data clusters at guest offset 4k,
+# because we expect the reftable to be placed there, and we will have
+# to verify that it is.
+
+l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
+l2_offset=$((l2_offset & 0x00fffffffffffe00))
+data_4k_offset=$(peek_file_be "$TEST_IMG" \
+ $((l2_offset + 4096 / 512 * 8)) 8)
+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
+
+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+
+# Check whether the reftable was put where we expected
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+if [[ $rt_offset -eq $data_4k_offset ]]; then
+ echo 'OK: Reftable is where we expect it'
+else
+ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
+fi
+
+echo
+echo '--- Rebuilding refcount structures on block devices ---'
+echo
+
+# A block device cannot really grow, at least not during qemu-img
+# check. As mentioned in the above cases, rebuilding the refcount
+# structure may lead to new refcount structures being written after
+# the end of the image, and in the past that happened even if there
+# was more than sufficient space in the image. Such post-EOF writes
+# will not work on block devices, so test that the new algorithm
+# avoids it.
+
+# If we have passwordless sudo and losetup, we can use those to create
+# a block device. Otherwise, we can resort to qemu's FUSE export to
+# create a file that isn't growable, which effectively tests the same
+# thing.
+
+_cleanup_test_img
+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
+
+if $loopdev; then
+ export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
+ export_mp_driver=host_device
+ sudo -n chmod go+rw "$export_mp"
+else
+ # Create non-growable FUSE export that is a bit like an empty
+ # block device
+ export_mp="$TEST_DIR/fuse-export"
+ export_mp_driver=file
+ touch "$export_mp"
+
+ $QSD \
+ --blockdev file,node-name=export-node,filename="$TEST_IMG" \
+ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
+ --pidfile "$TEST_DIR/qsd.pid" \
+ &
+
+ while [ ! -f "$TEST_DIR/qsd.pid" ]; do
+ sleep 0.1
+ done
+fi
+
+# Now create a qcow2 image on the device -- unfortunately, qemu-img
+# create force-creates the file, so we have to resort to the
+# blockdev-create job.
+_launch_qemu \
+ --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
+
+_send_qemu_cmd \
+ $QEMU_HANDLE \
+ '{ "execute": "qmp_capabilities" }' \
+ 'return'
+
+# Small cluster size again, so the image needs multiple refblocks
+_send_qemu_cmd \
+ $QEMU_HANDLE \
+ '{ "execute": "blockdev-create",
+ "arguments": {
+ "job-id": "create",
+ "options": {
+ "driver": "qcow2",
+ "file": "file",
+ "size": '$((64 * 1024 * 1024))',
+ "cluster-size": 512
+ } } }' \
+ '"concluded"'
+
+_send_qemu_cmd \
+ $QEMU_HANDLE \
+ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
+ 'return'
+
+_send_qemu_cmd \
+ $QEMU_HANDLE \
+ '{ "execute": "quit" }' \
+ 'return'
+
+wait=y _cleanup_qemu
+echo
+
+# Write some data
+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$export_mp" 48 8)
+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
+poke_file "$export_mp" $rb_offset "\x00\x00"
+
+# Repairing such a simple case should just work
+# (We used to put the reftable at the end of the image file, which can
+# never work for non-growable devices.)
+echo
+TEST_IMG="$export_mp" _check_test_img -r all \
+ | grep -v '^Repairing cluster.*refcount=1 reference=0'
+
+if $loopdev; then
+ sudo -n losetup -d "$export_mp"
+else
+ qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+ kill -TERM "$qsd_pid"
+ # Wait for process to exit (cannot `wait` because the QSD is daemonized)
+ while [ -f "$TEST_DIR/qsd.pid" ]; do
+ true
+ done
+fi
+
# success, all done
echo '*** done'
rm -f $seq.full
diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out
index 75bab8dc84..b5401d788d 100644
--- a/tests/qemu-iotests/108.out
+++ b/tests/qemu-iotests/108.out
@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
0 leaked clusters
1 corruptions
+Double checking the fixed image now...
+No errors were found on the image.
+
+=== Check rebuilt reftable location ===
+
+--- Does the image size increase? ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Image size did not change
+
+--- Will the reftable occupy a hole specifically left for it? ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
+wrote 8388608/8388608 bytes at offset 0
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 1024/1024 bytes at offset 4096
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Reftable is where we expect it
+
+--- Rebuilding refcount structures on block devices ---
+
+{ "execute": "qmp_capabilities" }
+{"return": {}}
+{ "execute": "blockdev-create",
+ "arguments": {
+ "job-id": "create",
+ "options": {
+ "driver": "IMGFMT",
+ "file": "file",
+ "size": 67108864,
+ "cluster-size": 512
+ } } }
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
+{ "execute": "job-dismiss", "arguments": { "id": "create" } }
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
+{"return": {}}
+{ "execute": "quit" }
+{"return": {}}
+{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+ 0 leaked clusters
+ 1 corruptions
+
Double checking the fixed image now...
No errors were found on the image.
*** done
--
2.31.1

@ -0,0 +1,62 @@
From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 28 Jul 2022 16:24:46 +0200
Subject: [PATCH 30/32] kvm: don't use perror() without useful errno
RH-Author: Cornelia Huck <cohuck@redhat.com>
RH-MergeRequest: 110: kvm: don't use perror() without useful errno
RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s)
RH-Bugzilla: 2095608
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Gavin Shan <gshan@redhat.com>
perror() is designed to append the decoded errno value to a
string. This, however, only makes sense if we called something that
actually sets errno prior to that.
For the callers that check for split irqchip support that is not the
case, and we end up with confusing error messages that end in
"success". Use error_report() instead.
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Message-Id: <20220728142446.438177-1-cohuck@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
https://bugzilla.redhat.com/show_bug.cgi?id=2095608
(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f)
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
accel/kvm/kvm-all.c | 2 +-
target/arm/kvm.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 5f1377ca04..e9c7947640 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s)
ret = kvm_arch_irqchip_create(s);
if (ret == 0) {
if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) {
- perror("Split IRQ chip mode not supported.");
+ error_report("Split IRQ chip mode not supported.");
exit(1);
} else {
ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index bbf1ce7ba3..0a2ba1f8e3 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s)
int kvm_arch_irqchip_create(KVMState *s)
{
if (kvm_kernel_irqchip_split()) {
- perror("-machine kernel_irqchip=split is not supported on ARM.");
+ error_report("-machine kernel_irqchip=split is not supported on ARM.");
exit(1);
}
--
2.31.1

@ -0,0 +1,49 @@
From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 9 Jun 2022 17:47:12 +0100
Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in
laio_io_unplug()
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug()
RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm)
RH-Bugzilla: 2092788
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
It may not be obvious why laio_io_unplug() checks max batch. I discussed
this with Stefano and have added a comment summarizing the reason.
Cc: Stefano Garzarella <sgarzare@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20220609164712.1539045-3-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/linux-aio.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 6078da7e42..9c2393a2f7 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
assert(s->io_q.plugged);
s->io_q.plugged--;
+ /*
+ * Why max batch checking is performed here:
+ * Another BDS may have queued requests with a higher dev_max_batch and
+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max
+ * batch so we can honor our device's dev_max_batch.
+ */
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
(!s->io_q.plugged &&
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
--
2.31.1

@ -0,0 +1,56 @@
From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 9 Jun 2022 17:47:11 +0100
Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in
laio_io_unplug()
RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug()
RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm)
RH-Bugzilla: 2092788
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
Every laio_io_plug() call has a matching laio_io_unplug() call. There is
a plugged counter that tracks the number of levels of plugging and
allows for nesting.
The plugged counter must reflect the balance between laio_io_plug() and
laio_io_unplug() calls accurately. Otherwise I/O stalls occur since
io_submit(2) calls are skipped while plugged.
Reported-by: Nikolay Tenev <nt@storpool.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Message-id: 20220609164712.1539045-2-stefanha@redhat.com
Cc: Stefano Garzarella <sgarzare@redhat.com>
Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()")
[Stefano Garzarella suggested adding a Fixes tag.
--Stefan]
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7)
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/linux-aio.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 4c423fcccf..6078da7e42 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
uint64_t dev_max_batch)
{
assert(s->io_q.plugged);
+ s->io_q.plugged--;
+
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
- (--s->io_q.plugged == 0 &&
+ (!s->io_q.plugged &&
!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
ioq_submit(s);
}
--
2.31.1

@ -0,0 +1,154 @@
From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 21 Jul 2022 15:50:43 +0200
Subject: [PATCH 05/32] meson: create have_vhost_* variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eugenio Pérez <eperezma@redhat.com>
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm)
RH-Bugzilla: 1939363
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
Bugzilla: https://bugzilla.redhat.com/1939363
Upstream Status: git://git.qemu.org/qemu.git
commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed Apr 20 17:34:05 2022 +0200
meson: create have_vhost_* variables
When using Meson options rather than config-host.h, the "when" clauses
have to be changed to if statements (which is not necessarily great,
though at least it highlights which parts of the build are per-target
and which are not).
Do that before moving vhost logic to meson.build, though for now
the variables are just based on config-host.mak data.
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
meson.build | 30 ++++++++++++++++++++----------
tests/meson.build | 2 +-
tools/meson.build | 2 +-
3 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/meson.build b/meson.build
index 13e3323380..735f538497 100644
--- a/meson.build
+++ b/meson.build
@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \
.require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \
.allowed()
+# vhost
+have_vhost_user = 'CONFIG_VHOST_USER' in config_host
+have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host
+have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host
+have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host
+have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host
+have_vhost_net = 'CONFIG_VHOST_NET' in config_host
+have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host
+
# Target-specific libraries and flags
libm = cc.find_library('m', required: false)
threads = dependency('threads')
@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test)
have_vhost_user_blk_server = get_option('vhost_user_blk_server') \
.require(targetos == 'linux',
error_message: 'vhost_user_blk_server requires linux') \
- .require('CONFIG_VHOST_USER' in config_host,
+ .require(have_vhost_user,
error_message: 'vhost_user_blk_server requires vhost-user support') \
.disable_auto_if(not have_system) \
.allowed()
@@ -2116,9 +2125,9 @@ host_kconfig = \
(have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \
('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \
(x11.found() ? ['CONFIG_X11=y'] : []) + \
- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \
- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \
- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
+ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \
+ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \
+ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \
(have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \
('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \
('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \
@@ -2799,7 +2808,7 @@ if have_system or have_user
endif
vhost_user = not_found
-if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host
+if targetos == 'linux' and have_vhost_user
libvhost_user = subproject('libvhost-user')
vhost_user = libvhost_user.get_variable('vhost_user_dep')
endif
@@ -3386,7 +3395,7 @@ if have_tools
dependencies: qemuutil,
install: true)
- if 'CONFIG_VHOST_USER' in config_host
+ if have_vhost_user
subdir('contrib/vhost-user-blk')
subdir('contrib/vhost-user-gpu')
subdir('contrib/vhost-user-input')
@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends')
endif
summary_info += {'D-Bus display': dbus_display}
summary_info += {'QOM debugging': get_option('qom_cast_debug')}
-summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')}
-summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')}
-summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')}
+summary_info += {'vhost-kernel support': have_vhost_kernel}
+summary_info += {'vhost-net support': have_vhost_net}
+summary_info += {'vhost-user support': have_vhost_user}
+summary_info += {'vhost-user-crypto support': have_vhost_user_crypto}
summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')}
summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')}
-summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')}
summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server}
summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')}
summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')}
+summary_info += {'vhost-vdpa support': have_vhost_vdpa}
summary_info += {'build guest agent': have_ga}
summary(summary_info, bool_yn: true, section: 'Configurable features')
diff --git a/tests/meson.build b/tests/meson.build
index 1d05109eb4..bbe41c8559 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -70,7 +70,7 @@ test_deps = {
'test-qht-par': qht_bench,
}
-if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host
+if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host
executable('vhost-user-bridge',
sources: files('vhost-user-bridge.c'),
dependencies: [qemuutil, vhost_user])
diff --git a/tools/meson.build b/tools/meson.build
index 46977af84f..10eb3a043f 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \
error_message: 'virtiofsd requires Linux') \
.require(seccomp.found() and libcap_ng.found(),
error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \
- .require('CONFIG_VHOST_USER' in config_host,
+ .require(have_vhost_user,
error_message: 'virtiofsd needs vhost-user-support') \
.disable_auto_if(not have_tools and not have_system) \
.allowed()
--
2.31.1

@ -0,0 +1,213 @@
From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 21 Jul 2022 15:51:53 +0200
Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eugenio Pérez <eperezma@redhat.com>
RH-MergeRequest: 108: Net Control Virtqueue shadow Support
RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm)
RH-Bugzilla: 1939363
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
Bugzilla: https://bugzilla.redhat.com/1939363
Upstream Status: git://git.qemu.org/qemu.git
commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6
Author: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed Apr 20 17:34:06 2022 +0200
meson: use have_vhost_* variables to pick sources
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
Kconfig.host | 3 ---
backends/meson.build | 8 ++++++--
hw/net/meson.build | 8 ++++++--
hw/virtio/Kconfig | 3 ---
hw/virtio/meson.build | 25 ++++++++++++++++---------
meson.build | 1 +
net/meson.build | 12 +++++++-----
tests/qtest/meson.build | 4 +++-
8 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/Kconfig.host b/Kconfig.host
index 60b9c07b5e..1165c4eacd 100644
--- a/Kconfig.host
+++ b/Kconfig.host
@@ -22,15 +22,12 @@ config TPM
config VHOST_USER
bool
- select VHOST
config VHOST_VDPA
bool
- select VHOST
config VHOST_KERNEL
bool
- select VHOST
config VIRTFS
bool
diff --git a/backends/meson.build b/backends/meson.build
index 6e68945528..cb92f639ca 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -12,9 +12,13 @@ softmmu_ss.add([files(
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
-softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c'))
+if have_vhost_user
+ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
+endif
softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
-softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c'))
+if have_vhost_user_crypto
+ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
+endif
softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio])
softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))
diff --git a/hw/net/meson.build b/hw/net/meson.build
index 685b75badb..ebac261542 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c'
softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c'))
specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c'))
-softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c'))
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c'))
+if have_vhost_net
+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c'))
+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c'))
+else
+ softmmu_ss.add(files('vhost_net-stub.c'))
+endif
softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files(
'fsl_etsec/etsec.c',
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index c144d42f9b..8ca7b3d9d6 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -1,6 +1,3 @@
-config VHOST
- bool
-
config VIRTIO
bool
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index 67dc77e00f..30a832eb4a 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set()
softmmu_virtio_ss.add(files('virtio-bus.c'))
softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
-softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c'))
-
-softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss)
-softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c'))
-
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
virtio_ss = ss.source_set()
virtio_ss.add(files('virtio.c'))
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c'))
-virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
-virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c'))
+
+if have_vhost
+ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c'))
+ if have_vhost_user
+ virtio_ss.add(files('vhost-user.c'))
+ endif
+ if have_vhost_vdpa
+ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c'))
+ endif
+else
+ softmmu_virtio_ss.add(files('vhost-stub.c'))
+endif
+
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c'))
@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c'))
virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss)
+softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss)
+softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c'))
+softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
diff --git a/meson.build b/meson.build
index 735f538497..9ba675f098 100644
--- a/meson.build
+++ b/meson.build
@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host
have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host
have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host
have_vhost_net = 'CONFIG_VHOST_NET' in config_host
+have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel
have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host
# Target-specific libraries and flags
diff --git a/net/meson.build b/net/meson.build
index 847bc2ac85..c965e83b26 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c'))
if have_netmap
softmmu_ss.add(files('netmap.c'))
endif
-vhost_user_ss = ss.source_set()
-vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
-softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss)
-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c'))
+if have_vhost_net_user
+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c'))
+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c'))
+endif
softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c'))
softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c'))
@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B
endif
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
-softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c'))
+if have_vhost_net_vdpa
+ softmmu_ss.add(files('vhost-vdpa.c'))
+endif
subdir('can')
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 67cd32def1..9f550df900 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -269,7 +269,9 @@ qos_test_ss.add(
if have_virtfs
qos_test_ss.add(files('virtio-9p-test.c'))
endif
-qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
+if have_vhost_user
+ qos_test_ss.add(files('vhost-user-test.c'))
+endif
if have_tools and have_vhost_user_blk_server
qos_test_ss.add(files('vhost-user-blk-test.c'))
endif
--
2.31.1

@ -0,0 +1,87 @@
From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:30 -0300
Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when
including linux/errqueue.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
A build error happens in alpine CI when linux/errqueue.h is included
in io/channel-socket.c, due to redefining of 'struct __kernel_timespec':
===
ninja: job failed: [...]
In file included from /usr/include/linux/errqueue.h:6,
from ../io/channel-socket.c:29:
/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec'
7 | struct __kernel_timespec {
| ^~~~~~~~~~~~~~~~~
In file included from /usr/include/liburing.h:19,
from /builds/user/qemu/include/block/aio.h:18,
from /builds/user/qemu/include/io/channel.h:26,
from /builds/user/qemu/include/io/channel-socket.h:24,
from ../io/channel-socket.c:24:
/usr/include/liburing/compat.h:9:8: note: originally defined here
9 | struct __kernel_timespec {
| ^~~~~~~~~~~~~~~~~
ninja: subcommand failed
===
As above error message suggests, 'struct __kernel_timespec' was already
defined by liburing/compat.h.
Fix alpine CI by adding test to disable liburing in configure step if a
redefinition happens between linux/errqueue.h and liburing/compat.h.
[dgilbert: This has been fixed in Alpine issue 13813 and liburing]
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20220513062836.965425-2-leobras@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
meson.build | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/meson.build b/meson.build
index 5a7c10e639..13e3323380 100644
--- a/meson.build
+++ b/meson.build
@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block
required: get_option('linux_aio'),
kwargs: static_kwargs)
endif
+
+linux_io_uring_test = '''
+ #include <liburing.h>
+ #include <linux/errqueue.h>
+
+ int main(void) { return 0; }'''
+
linux_io_uring = not_found
if not get_option('linux_io_uring').auto() or have_block
linux_io_uring = dependency('liburing', version: '>=0.3',
required: get_option('linux_io_uring'),
method: 'pkg-config', kwargs: static_kwargs)
+ if not cc.links(linux_io_uring_test)
+ linux_io_uring = not_found
+ endif
endif
+
libnfs = not_found
if not get_option('libnfs').auto() or have_block
libnfs = dependency('libnfs', version: '>=1.9.3',
--
2.35.3

@ -0,0 +1,106 @@
From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:34 -0300
Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
A lot of places check parameters.tls_creds in order to evaluate if TLS is
in use, and sometimes call migrate_get_current() just for that test.
Add new helper function migrate_use_tls() in order to simplify testing
for TLS usage.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20220513062836.965425-6-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/channel.c | 3 +--
migration/migration.c | 9 +++++++++
migration/migration.h | 1 +
migration/multifd.c | 5 +----
4 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/migration/channel.c b/migration/channel.c
index c4fc000a1a..086b5c0d8b 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
- if (s->parameters.tls_creds &&
- *s->parameters.tls_creds &&
+ if (migrate_use_tls() &&
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
migration_tls_channel_process_incoming(s, ioc, &local_err);
diff --git a/migration/migration.c b/migration/migration.c
index 0a6b3b9f4d..d91efb66fe 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void)
}
#endif
+int migrate_use_tls(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.tls_creds && *s->parameters.tls_creds;
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 5bcb7628ef..c2cabb8a14 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void);
#else
#define migrate_use_zero_copy_send() (false)
#endif
+int migrate_use_tls(void);
int migrate_use_xbzrle(void);
uint64_t migrate_xbzrle_cache_size(void);
bool migrate_colo_enabled(void);
diff --git a/migration/multifd.c b/migration/multifd.c
index 76b57a7177..43998ad117 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
QIOChannel *ioc,
Error *error)
{
- MigrationState *s = migrate_get_current();
-
trace_multifd_set_outgoing_channel(
ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error);
if (!error) {
- if (s->parameters.tls_creds &&
- *s->parameters.tls_creds &&
+ if (migrate_use_tls() &&
!object_dynamic_cast(OBJECT(ioc),
TYPE_QIO_CHANNEL_TLS)) {
multifd_tls_channel_connect(p, ioc, &error);
--
2.35.3

@ -0,0 +1,250 @@
From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:33 -0300
Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for
Linux
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Add property that allows zero-copy migration of memory pages
on the sending side, and also includes a helper function
migrate_use_zero_copy_send() to check if it's enabled.
No code is introduced to actually do the migration, but it allow
future implementations to enable/disable this feature.
On non-Linux builds this parameter is compiled-out.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Message-Id: <20220513062836.965425-5-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 32 ++++++++++++++++++++++++++++++++
migration/migration.h | 5 +++++
migration/socket.c | 11 +++++++++--
monitor/hmp-cmds.c | 6 ++++++
qapi/migration.json | 24 ++++++++++++++++++++++++
5 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 695f0f2900..0a6b3b9f4d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->multifd_zlib_level = s->parameters.multifd_zlib_level;
params->has_multifd_zstd_level = true;
params->multifd_zstd_level = s->parameters.multifd_zstd_level;
+#ifdef CONFIG_LINUX
+ params->has_zero_copy_send = true;
+ params->zero_copy_send = s->parameters.zero_copy_send;
+#endif
params->has_xbzrle_cache_size = true;
params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
params->has_max_postcopy_bandwidth = true;
@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_multifd_compression) {
dest->multifd_compression = params->multifd_compression;
}
+#ifdef CONFIG_LINUX
+ if (params->has_zero_copy_send) {
+ dest->zero_copy_send = params->zero_copy_send;
+ }
+#endif
if (params->has_xbzrle_cache_size) {
dest->xbzrle_cache_size = params->xbzrle_cache_size;
}
@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_multifd_compression) {
s->parameters.multifd_compression = params->multifd_compression;
}
+#ifdef CONFIG_LINUX
+ if (params->has_zero_copy_send) {
+ s->parameters.zero_copy_send = params->zero_copy_send;
+ }
+#endif
if (params->has_xbzrle_cache_size) {
s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
xbzrle_cache_resize(params->xbzrle_cache_size, errp);
@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void)
return s->parameters.multifd_zstd_level;
}
+#ifdef CONFIG_LINUX
+bool migrate_use_zero_copy_send(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->parameters.zero_copy_send;
+}
+#endif
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
@@ -4200,6 +4225,10 @@ static Property migration_properties[] = {
DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
parameters.multifd_zstd_level,
DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
+#ifdef CONFIG_LINUX
+ DEFINE_PROP_BOOL("zero_copy_send", MigrationState,
+ parameters.zero_copy_send, false),
+#endif
DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
parameters.xbzrle_cache_size,
DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj)
params->has_multifd_compression = true;
params->has_multifd_zlib_level = true;
params->has_multifd_zstd_level = true;
+#ifdef CONFIG_LINUX
+ params->has_zero_copy_send = true;
+#endif
params->has_xbzrle_cache_size = true;
params->has_max_postcopy_bandwidth = true;
params->has_max_cpu_throttle = true;
diff --git a/migration/migration.h b/migration/migration.h
index 2de861df01..5bcb7628ef 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void);
int migrate_multifd_zlib_level(void);
int migrate_multifd_zstd_level(void);
+#ifdef CONFIG_LINUX
+bool migrate_use_zero_copy_send(void);
+#else
+#define migrate_use_zero_copy_send() (false)
+#endif
int migrate_use_xbzrle(void);
uint64_t migrate_xbzrle_cache_size(void);
bool migrate_colo_enabled(void);
diff --git a/migration/socket.c b/migration/socket.c
index 05705a32d8..3754d8f72c 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task,
if (qio_task_propagate_error(task, &err)) {
trace_migration_socket_outgoing_error(error_get_pretty(err));
- } else {
- trace_migration_socket_outgoing_connected(data->hostname);
+ goto out;
}
+
+ trace_migration_socket_outgoing_connected(data->hostname);
+
+ if (migrate_use_zero_copy_send()) {
+ error_setg(&err, "Zero copy send not available in migration");
+ }
+
+out:
migration_channel_connect(data->s, sioc, data->hostname, err);
object_unref(OBJECT(sioc));
}
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 634968498b..55b48d3733 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_multifd_zstd_level = true;
visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
break;
+#ifdef CONFIG_LINUX
+ case MIGRATION_PARAMETER_ZERO_COPY_SEND:
+ p->has_zero_copy_send = true;
+ visit_type_bool(v, param, &p->zero_copy_send, &err);
+ break;
+#endif
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
p->has_xbzrle_cache_size = true;
if (!visit_type_size(v, param, &cache_size, &err)) {
diff --git a/qapi/migration.json b/qapi/migration.json
index 27d7b28158..4d833ecdd6 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -741,6 +741,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+# When true, enables a zero-copy mechanism for sending
+# memory pages, if host supports it.
+# Requires that QEMU be permitted to use locked memory
+# for guest RAM pages.
+# Defaults to false. (Since 7.1)
+#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@@ -780,6 +787,7 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
+ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
'block-bitmap-mapping' ] }
##
@@ -906,6 +914,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+# When true, enables a zero-copy mechanism for sending
+# memory pages, if host supports it.
+# Requires that QEMU be permitted to use locked memory
+# for guest RAM pages.
+# Defaults to false. (Since 7.1)
+#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@@ -960,6 +975,7 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##
@@ -1106,6 +1122,13 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+# When true, enables a zero-copy mechanism for sending
+# memory pages, if host supports it.
+# Requires that QEMU be permitted to use locked memory
+# for guest RAM pages.
+# Defaults to false. (Since 7.1)
+#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@@ -1158,6 +1181,7 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
+ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##
--
2.35.3

@ -0,0 +1,98 @@
From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 31 Mar 2022 11:08:45 -0400
Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times
RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm)
RH-Bugzilla: 2096143
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Previously migration didn't have an easy way to cleanup the listening
transport, migrate recovery only allows to execute once. That's done with a
trick flag in postcopy_recover_triggered.
Now the facility is already there.
Drop postcopy_recover_triggered and instead allows a new migrate-recover to
release the previous listener transport.
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220331150857.74406-8-peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/migration.c | 13 ++-----------
migration/migration.h | 1 -
migration/savevm.c | 3 ---
3 files changed, 2 insertions(+), 15 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 2a141bfaf3..8fb3eae910 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp)
return;
}
- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered,
- false, true) == true) {
- error_setg(errp, "Migrate recovery is triggered already");
- return;
- }
+ /* If there's an existing transport, release it */
+ migration_incoming_transport_cleanup(mis);
/*
* Note that this call will never start a real migration; it will
@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp)
* to continue using that newly established channel.
*/
qemu_start_incoming_migration(uri, errp);
-
- /* Safe to dereference with the assert above */
- if (*errp) {
- /* Reset the flag so user could still retry */
- qatomic_set(&mis->postcopy_recover_triggered, false);
- }
}
void qmp_migrate_pause(Error **errp)
diff --git a/migration/migration.h b/migration/migration.h
index c2cabb8a14..fbc8690ec8 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -139,7 +139,6 @@ struct MigrationIncomingState {
struct PostcopyBlocktimeContext *blocktime_ctx;
/* notify PAUSED postcopy incoming migrations to try to continue */
- bool postcopy_recover_triggered;
QemuSemaphore postcopy_pause_sem_dst;
QemuSemaphore postcopy_pause_sem_fault;
diff --git a/migration/savevm.c b/migration/savevm.c
index 02ed94c180..d9076897b8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
assert(migrate_postcopy_ram());
- /* Clear the triggered bit to allow one recovery */
- mis->postcopy_recover_triggered = false;
-
/*
* Unregister yank with either from/to src would work, since ioc behind it
* is the same
--
2.35.3

@ -0,0 +1,93 @@
From 0753565af588dfa78b3529e359b1590e15fcbdb3 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Tue, 19 Jul 2022 09:23:45 -0300
Subject: [PATCH 04/11] migration: Avoid false-positive on non-supported
scenarios for zero-copy-send
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [4/6] f5c7ed6710d92668acb81d0118a71fab0b4e3d43 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Migration with zero-copy-send currently has it's limitations, as it can't
be used with TLS nor any kind of compression. In such scenarios, it should
output errors during parameter / capability setting.
But currently there are some ways of setting this not-supported scenarios
without printing the error message:
!) For 'compression' capability, it works by enabling it together with
zero-copy-send. This happens because the validity test for zero-copy uses
the helper unction migrate_use_compression(), which check for compression
presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS].
The point here is: the validity test happens before the capability gets
enabled. If all of them get enabled together, this test will not return
error.
In order to fix that, replace migrate_use_compression() by directly testing
the cap_list parameter migrate_caps_check().
2) For features enabled by parameters such as TLS & 'multifd_compression',
there was also a possibility of setting non-supported scenarios: setting
zero-copy-send first, then setting the unsupported parameter.
In order to fix that, also add a check for parameters conflicting with
zero-copy-send on migrate_params_check().
3) XBZRLE is also a compression capability, so it makes sense to also add
it to the list of capabilities which are not supported with zero-copy-send.
Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20220719122345.253713-1-leobras@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/migration/migration.c b/migration/migration.c
index 3a3a7a4a50..343629d59c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1265,7 +1265,9 @@ static bool migrate_caps_check(bool *cap_list,
#ifdef CONFIG_LINUX
if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
(!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
- migrate_use_compression() ||
+ cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
+ cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
+ migrate_multifd_compression() ||
migrate_use_tls())) {
error_setg(errp,
"Zero copy only available for non-compressed non-TLS multifd migration");
@@ -1502,6 +1504,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
return false;
}
+
+#ifdef CONFIG_LINUX
+ if (migrate_use_zero_copy_send() &&
+ ((params->has_multifd_compression && params->multifd_compression) ||
+ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) {
+ error_setg(errp,
+ "Zero copy only available for non-compressed non-TLS multifd migration");
+ return false;
+ }
+#endif
+
return true;
}
--
2.31.1

@ -0,0 +1,289 @@
From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 20 Jun 2022 02:39:45 -0300
Subject: [PATCH 17/18] migration: Change zero_copy_send from migration
parameter to migration capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
When originally implemented, zero_copy_send was designed as a Migration
paramenter.
But taking into account how is that supposed to work, and how
the difference between a capability and a parameter, it only makes sense
that zero-copy-send would work better as a capability.
Taking into account how recently the change got merged, it was decided
that it's still time to make it right, and convert zero_copy_send into
a Migration capability.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
dgilbert: always define the capability, even on non-Linux but error if
set; avoids build problems with the capability
(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 58 +++++++++++++++++++------------------------
monitor/hmp-cmds.c | 6 -----
qapi/migration.json | 33 +++++++-----------------
3 files changed, 34 insertions(+), 63 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index 102236fba0..2a141bfaf3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
MIGRATION_CAPABILITY_COMPRESS,
MIGRATION_CAPABILITY_XBZRLE,
MIGRATION_CAPABILITY_X_COLO,
- MIGRATION_CAPABILITY_VALIDATE_UUID);
+ MIGRATION_CAPABILITY_VALIDATE_UUID,
+ MIGRATION_CAPABILITY_ZERO_COPY_SEND);
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
params->multifd_zlib_level = s->parameters.multifd_zlib_level;
params->has_multifd_zstd_level = true;
params->multifd_zstd_level = s->parameters.multifd_zstd_level;
-#ifdef CONFIG_LINUX
- params->has_zero_copy_send = true;
- params->zero_copy_send = s->parameters.zero_copy_send;
-#endif
params->has_xbzrle_cache_size = true;
params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
params->has_max_postcopy_bandwidth = true;
@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list,
}
}
+#ifdef CONFIG_LINUX
+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
+ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
+ migrate_use_compression() ||
+ migrate_use_tls())) {
+ error_setg(errp,
+ "Zero copy only available for non-compressed non-TLS multifd migration");
+ return false;
+ }
+#else
+ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
+ error_setg(errp,
+ "Zero copy currently only available on Linux");
+ return false;
+ }
+#endif
+
+
/* incoming side only */
if (runstate_check(RUN_STATE_INMIGRATE) &&
!migrate_multifd_is_allowed() &&
@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
return false;
}
-#ifdef CONFIG_LINUX
- if (params->zero_copy_send &&
- (!migrate_use_multifd() ||
- params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
- (params->tls_creds && *params->tls_creds))) {
- error_setg(errp,
- "Zero copy only available for non-compressed non-TLS multifd migration");
- return false;
- }
-#endif
return true;
}
@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_multifd_compression) {
dest->multifd_compression = params->multifd_compression;
}
-#ifdef CONFIG_LINUX
- if (params->has_zero_copy_send) {
- dest->zero_copy_send = params->zero_copy_send;
- }
-#endif
if (params->has_xbzrle_cache_size) {
dest->xbzrle_cache_size = params->xbzrle_cache_size;
}
@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
if (params->has_multifd_compression) {
s->parameters.multifd_compression = params->multifd_compression;
}
-#ifdef CONFIG_LINUX
- if (params->has_zero_copy_send) {
- s->parameters.zero_copy_send = params->zero_copy_send;
- }
-#endif
if (params->has_xbzrle_cache_size) {
s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
xbzrle_cache_resize(params->xbzrle_cache_size, errp);
@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void)
s = migrate_get_current();
- return s->parameters.zero_copy_send;
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
}
#endif
@@ -4243,10 +4238,6 @@ static Property migration_properties[] = {
DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
parameters.multifd_zstd_level,
DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
-#ifdef CONFIG_LINUX
- DEFINE_PROP_BOOL("zero_copy_send", MigrationState,
- parameters.zero_copy_send, false),
-#endif
DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
parameters.xbzrle_cache_size,
DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
@@ -4284,6 +4275,10 @@ static Property migration_properties[] = {
DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
DEFINE_PROP_MIG_CAP("x-background-snapshot",
MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
+#ifdef CONFIG_LINUX
+ DEFINE_PROP_MIG_CAP("x-zero-copy-send",
+ MIGRATION_CAPABILITY_ZERO_COPY_SEND),
+#endif
DEFINE_PROP_END_OF_LIST(),
};
@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj)
params->has_multifd_compression = true;
params->has_multifd_zlib_level = true;
params->has_multifd_zstd_level = true;
-#ifdef CONFIG_LINUX
- params->has_zero_copy_send = true;
-#endif
params->has_xbzrle_cache_size = true;
params->has_max_postcopy_bandwidth = true;
params->has_max_cpu_throttle = true;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 55b48d3733..634968498b 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
p->has_multifd_zstd_level = true;
visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
break;
-#ifdef CONFIG_LINUX
- case MIGRATION_PARAMETER_ZERO_COPY_SEND:
- p->has_zero_copy_send = true;
- visit_type_bool(v, param, &p->zero_copy_send, &err);
- break;
-#endif
case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
p->has_xbzrle_cache_size = true;
if (!visit_type_size(v, param, &cache_size, &err)) {
diff --git a/qapi/migration.json b/qapi/migration.json
index 4d833ecdd6..5105790cd0 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -463,6 +463,13 @@
# procedure starts. The VM RAM is saved with running VM.
# (since 6.0)
#
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+# When true, enables a zero-copy mechanism for sending
+# memory pages, if host supports it.
+# Requires that QEMU be permitted to use locked memory
+# for guest RAM pages.
+# (since 7.1)
+#
# Features:
# @unstable: Members @x-colo and @x-ignore-shared are experimental.
#
@@ -476,7 +483,8 @@
'block', 'return-path', 'pause-before-switchover', 'multifd',
'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
- 'validate-uuid', 'background-snapshot'] }
+ 'validate-uuid', 'background-snapshot',
+ 'zero-copy-send'] }
##
# @MigrationCapabilityStatus:
@@ -741,12 +749,6 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
-# @zero-copy-send: Controls behavior on sending memory pages on migration.
-# When true, enables a zero-copy mechanism for sending
-# memory pages, if host supports it.
-# Requires that QEMU be permitted to use locked memory
-# for guest RAM pages.
-# Defaults to false. (Since 7.1)
#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
@@ -787,7 +789,6 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
'block-bitmap-mapping' ] }
##
@@ -914,13 +915,6 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
-# @zero-copy-send: Controls behavior on sending memory pages on migration.
-# When true, enables a zero-copy mechanism for sending
-# memory pages, if host supports it.
-# Requires that QEMU be permitted to use locked memory
-# for guest RAM pages.
-# Defaults to false. (Since 7.1)
-#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@@ -975,7 +969,6 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##
@@ -1122,13 +1115,6 @@
# will consume more CPU.
# Defaults to 1. (Since 5.0)
#
-# @zero-copy-send: Controls behavior on sending memory pages on migration.
-# When true, enables a zero-copy mechanism for sending
-# memory pages, if host supports it.
-# Requires that QEMU be permitted to use locked memory
-# for guest RAM pages.
-# Defaults to false. (Since 7.1)
-#
# @block-bitmap-mapping: Maps block nodes and bitmaps on them to
# aliases for the purpose of dirty bitmap migration. Such
# aliases may for example be the corresponding names on the
@@ -1181,7 +1167,6 @@
'*multifd-compression': 'MultiFDCompression',
'*multifd-zlib-level': 'uint8',
'*multifd-zstd-level': 'uint8',
- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
##
--
2.35.3

@ -0,0 +1,47 @@
From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Thu, 19 May 2022 04:15:33 -0400
Subject: [PATCH 16/16] migration: Fix operator type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Miroslav Rezanina <mrezanin@redhat.com>
RH-MergeRequest: 92: Fix build using clang 14
RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm)
RH-Bugzilla: 2064530
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Clang spotted an & that should have been an &&; fix it.
Reported by: David Binderman / https://gitlab.com/dcb
Fixes: 65dacaa04fa ("migration: introduce save_normal_page()")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <20220406102515.96320-1-dgilbert@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0)
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
---
migration/ram.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/migration/ram.c b/migration/ram.c
index 3532f64ecb..0ef4bd63eb 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
offset | RAM_SAVE_FLAG_PAGE));
if (async) {
qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
- migrate_release_ram() &
+ migrate_release_ram() &&
migration_in_postcopy());
} else {
qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
--
2.31.1

@ -0,0 +1,62 @@
From 9698c0e8dd9b4f5dbc237a3f98ac46297dac85fb Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 25 Jul 2022 22:02:35 -0300
Subject: [PATCH 05/11] migration: add remaining params->has_* = true in
migration_instance_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [5/6] 50bbad254e2356b3ae16f6e00a3db8fd0b22dde9 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Some of params->has_* = true are missing in migration_instance_init, this
causes migrate_params_check() to skip some tests, allowing some
unsupported scenarios.
Fix this by adding all missing params->has_* = true in
migration_instance_init().
Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters")
Fixes: 1d58872a91 ("migration: do not wait for free thread")
Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter")
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Message-Id: <20220726010235.342927-1-leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/migration/migration.c b/migration/migration.c
index 343629d59c..5e78028df4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -4332,6 +4332,7 @@ static void migration_instance_init(Object *obj)
/* Set has_* up only for parameter checks */
params->has_compress_level = true;
params->has_compress_threads = true;
+ params->has_compress_wait_thread = true;
params->has_decompress_threads = true;
params->has_throttle_trigger_threshold = true;
params->has_cpu_throttle_initial = true;
@@ -4352,6 +4353,9 @@ static void migration_instance_init(Object *obj)
params->has_announce_max = true;
params->has_announce_rounds = true;
params->has_announce_step = true;
+ params->has_tls_creds = true;
+ params->has_tls_hostname = true;
+ params->has_tls_authz = true;
qemu_sem_init(&ms->postcopy_pause_sem, 0);
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
--
2.31.1

@ -0,0 +1,83 @@
From 78bbe28d5f5691330239041448cccfb339eed779 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Mon, 11 Jul 2022 18:11:13 -0300
Subject: [PATCH 03/11] migration/multifd: Report to user when zerocopy not
working
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 111: zero-copy-send fixes & improvements
RH-Commit: [3/6] 4f9165325b3cb8ff16d8b3b7649ff780fae0e2ad (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 2107466
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Some errors, like the lack of Scatter-Gather support by the network
interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using
zero-copy, which causes it to fall back to the default copying mechanism.
After each full dirty-bitmap scan there should be a zero-copy flush
happening, which checks for errors each of the previous calls to
sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then
increment dirty_sync_missed_zero_copy migration stat to let the user know
about it.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220711211112.18951-4-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/multifd.c | 2 ++
migration/ram.c | 5 +++++
migration/ram.h | 2 ++
3 files changed, 9 insertions(+)
diff --git a/migration/multifd.c b/migration/multifd.c
index 0b5b41c53f..96e5f0a058 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -626,6 +626,8 @@ int multifd_send_sync_main(QEMUFile *f)
if (ret < 0) {
error_report_err(err);
return -1;
+ } else if (ret == 1) {
+ dirty_sync_missed_zero_copy();
}
}
}
diff --git a/migration/ram.c b/migration/ram.c
index ee40e4a718..c437ff1b1f 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -406,6 +406,11 @@ static void ram_transferred_add(uint64_t bytes)
ram_counters.transferred += bytes;
}
+void dirty_sync_missed_zero_copy(void)
+{
+ ram_counters.dirty_sync_missed_zero_copy++;
+}
+
/* used by the search for pages to send */
struct PageSearchStatus {
/* Current block being searched */
diff --git a/migration/ram.h b/migration/ram.h
index 2c6dc3675d..34adf5cb92 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -86,4 +86,6 @@ void ram_write_tracking_prepare(void);
int ram_write_tracking_start(void);
void ram_write_tracking_stop(void);
+void dirty_sync_missed_zero_copy(void);
+
#endif
--
2.31.1

@ -0,0 +1,142 @@
From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 2 Aug 2022 10:11:21 +0200
Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 112: Fix postcopy migration on s390x
RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2099934
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934
zlib_send_prepare() compresses pages of a running VM. zlib does not
make any thread-safety guarantees with respect to changing deflate()
input concurrently with deflate() [1].
One can observe problems due to this with the IBM zEnterprise Data
Compression accelerator capable zlib [2]. When the hardware
acceleration is enabled, migration/multifd/tcp/plain/zlib test fails
intermittently [3] due to sliding window corruption. The accelerator's
architecture explicitly discourages concurrent accesses [4]:
Page 26-57, "Other Conditions":
As observed by this CPU, other CPUs, and channel
programs, references to the parameter block, first,
second, and third operands may be multiple-access
references, accesses to these storage locations are
not necessarily block-concurrent, and the sequence
of these accesses or references is undefined.
Mark Adler pointed out that vanilla zlib performs double fetches under
certain circumstances as well [5], therefore we need to copy data
before passing it to deflate().
[1] https://zlib.net/manual.html
[2] https://github.com/madler/zlib/pull/410
[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html
[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf
[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d)
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
index 3a7ae44485..18213a9513 100644
--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
@@ -27,6 +27,8 @@ struct zlib_data {
uint8_t *zbuff;
/* size of compressed buffer */
uint32_t zbuff_len;
+ /* uncompressed buffer of size qemu_target_page_size() */
+ uint8_t *buf;
};
/* Multifd zlib compression */
@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
{
struct zlib_data *z = g_new0(struct zlib_data, 1);
z_stream *zs = &z->zs;
+ const char *err_msg;
zs->zalloc = Z_NULL;
zs->zfree = Z_NULL;
zs->opaque = Z_NULL;
if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
- g_free(z);
- error_setg(errp, "multifd %u: deflate init failed", p->id);
- return -1;
+ err_msg = "deflate init failed";
+ goto err_free_z;
}
/* This is the maxium size of the compressed buffer */
z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE);
z->zbuff = g_try_malloc(z->zbuff_len);
if (!z->zbuff) {
- deflateEnd(&z->zs);
- g_free(z);
- error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
- return -1;
+ err_msg = "out of memory for zbuff";
+ goto err_deflate_end;
+ }
+ z->buf = g_try_malloc(qemu_target_page_size());
+ if (!z->buf) {
+ err_msg = "out of memory for buf";
+ goto err_free_zbuff;
}
p->data = z;
return 0;
+
+err_free_zbuff:
+ g_free(z->zbuff);
+err_deflate_end:
+ deflateEnd(&z->zs);
+err_free_z:
+ g_free(z);
+ error_setg(errp, "multifd %u: %s", p->id, err_msg);
+ return -1;
}
/**
@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
deflateEnd(&z->zs);
g_free(z->zbuff);
z->zbuff = NULL;
+ g_free(z->buf);
+ z->buf = NULL;
g_free(p->data);
p->data = NULL;
}
@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
flush = Z_SYNC_FLUSH;
}
+ /*
+ * Since the VM might be running, the page may be changing concurrently
+ * with compression. zlib does not guarantee that this is safe,
+ * therefore copy the page before calling deflate().
+ */
+ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size);
zs->avail_in = page_size;
- zs->next_in = p->pages->block->host + p->normal[i];
+ zs->next_in = z->buf;
zs->avail_out = available;
zs->next_out = z->zbuff + out_size;
--
2.31.1

@ -0,0 +1,182 @@
From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:37 -0300
Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration
(multifd-zero-copy)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Implement zero copy send on nocomp_send_write(), by making use of QIOChannel
writev + flags & flush interface.
Change multifd_send_sync_main() so flush_zero_copy() can be called
after each iteration in order to make sure all dirty pages are sent before
a new iteration is started. It will also flush at the beginning and at the
end of migration.
Also make it return -1 if flush_zero_copy() fails, in order to cancel
the migration process, and avoid resuming the guest in the target host
without receiving all current RAM.
This will work fine on RAM migration because the RAM pages are not usually freed,
and there is no problem on changing the pages content between writev_zero_copy() and
the actual sending of the buffer, because this change will dirty the page and
cause it to be re-sent on a next iteration anyway.
A lot of locked memory may be needed in order to use multifd migration
with zero-copy enabled, so disabling the feature should be necessary for
low-privileged users trying to perform multifd migrations.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20220513062836.965425-9-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/migration.c | 11 ++++++++++-
migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++--
migration/multifd.h | 2 ++
migration/socket.c | 5 +++--
4 files changed, 50 insertions(+), 5 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index d91efb66fe..102236fba0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
return false;
}
-
+#ifdef CONFIG_LINUX
+ if (params->zero_copy_send &&
+ (!migrate_use_multifd() ||
+ params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
+ (params->tls_creds && *params->tls_creds))) {
+ error_setg(errp,
+ "Zero copy only available for non-compressed non-TLS multifd migration");
+ return false;
+ }
+#endif
return true;
}
diff --git a/migration/multifd.c b/migration/multifd.c
index 8fca6c970e..0b5b41c53f 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -571,6 +571,7 @@ void multifd_save_cleanup(void)
int multifd_send_sync_main(QEMUFile *f)
{
int i;
+ bool flush_zero_copy;
if (!migrate_use_multifd()) {
return 0;
@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f)
return -1;
}
}
+
+ /*
+ * When using zero-copy, it's necessary to flush the pages before any of
+ * the pages can be sent again, so we'll make sure the new version of the
+ * pages will always arrive _later_ than the old pages.
+ *
+ * Currently we achieve this by flushing the zero-page requested writes
+ * per ram iteration, but in the future we could potentially optimize it
+ * to be less frequent, e.g. only after we finished one whole scanning of
+ * all the dirty bitmaps.
+ */
+
+ flush_zero_copy = migrate_use_zero_copy_send();
+
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f)
ram_counters.transferred += p->packet_len;
qemu_mutex_unlock(&p->mutex);
qemu_sem_post(&p->sem);
+
+ if (flush_zero_copy && p->c) {
+ int ret;
+ Error *err = NULL;
+
+ ret = qio_channel_flush(p->c, &err);
+ if (ret < 0) {
+ error_report_err(err);
+ return -1;
+ }
+ }
}
for (i = 0; i < migrate_multifd_channels(); i++) {
MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque)
p->iov[0].iov_base = p->packet;
}
- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
- &local_err);
+ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
+ 0, p->write_flags, &local_err);
if (ret != 0) {
break;
}
@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp)
/* We need one extra place for the packet header */
p->iov = g_new0(struct iovec, page_count + 1);
p->normal = g_new0(ram_addr_t, page_count);
+
+ if (migrate_use_zero_copy_send()) {
+ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
+ } else {
+ p->write_flags = 0;
+ }
+
socket_send_channel_create(multifd_new_send_channel_async, p);
}
diff --git a/migration/multifd.h b/migration/multifd.h
index cd495195ce..7ec688fb4f 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -96,6 +96,8 @@ typedef struct {
uint32_t packet_len;
/* pointer to the packet */
MultiFDPacket_t *packet;
+ /* multifd flags for sending ram */
+ int write_flags;
/* multifd flags for each packet */
uint32_t flags;
/* size of the next packet that contains pages */
diff --git a/migration/socket.c b/migration/socket.c
index 3754d8f72c..4fd5e85f50 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task,
trace_migration_socket_outgoing_connected(data->hostname);
- if (migrate_use_zero_copy_send()) {
- error_setg(&err, "Zero copy send not available in migration");
+ if (migrate_use_zero_copy_send() &&
+ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+ error_setg(&err, "Zero copy send feature not detected in host kernel");
}
out:
--
2.35.3

@ -0,0 +1,102 @@
From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:36 -0300
Subject: [PATCH 13/18] multifd: Send header packet without flags if
zero-copy-send is enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Since d48c3a0445 ("multifd: Use a single writev on the send side"),
sending the header packet and the memory pages happens in the same
writev, which can potentially make the migration faster.
Using channel-socket as example, this works well with the default copying
mechanism of sendmsg(), but with zero-copy-send=true, it will cause
the migration to often break.
This happens because the header packet buffer gets reused quite often,
and there is a high chance that by the time the MSG_ZEROCOPY mechanism get
to send the buffer, it has already changed, sending the wrong data and
causing the migration to abort.
It means that, as it is, the buffer for the header packet is not suitable
for sending with MSG_ZEROCOPY.
In order to enable zero copy for multifd, send the header packet on an
individual write(), without any flags, and the remanining pages with a
writev(), as it was happening before. This only changes how a migration
with zero-copy-send=true works, not changing any current behavior for
migrations with zero-copy-send=false.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Message-Id: <20220513062836.965425-8-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/multifd.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/migration/multifd.c b/migration/multifd.c
index cdb57439a7..8fca6c970e 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque)
MultiFDSendParams *p = opaque;
Error *local_err = NULL;
int ret = 0;
+ bool use_zero_copy_send = migrate_use_zero_copy_send();
trace_multifd_send_thread_start(p->id);
rcu_register_thread();
@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque)
if (p->pending_job) {
uint64_t packet_num = p->packet_num;
uint32_t flags = p->flags;
- p->iovs_num = 1;
p->normal_num = 0;
+ if (use_zero_copy_send) {
+ p->iovs_num = 0;
+ } else {
+ p->iovs_num = 1;
+ }
+
for (int i = 0; i < p->pages->num; i++) {
p->normal[p->normal_num] = p->pages->offset[i];
p->normal_num++;
@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque)
trace_multifd_send(p->id, packet_num, p->normal_num, flags,
p->next_packet_size);
- p->iov[0].iov_len = p->packet_len;
- p->iov[0].iov_base = p->packet;
+ if (use_zero_copy_send) {
+ /* Send header first, without zerocopy */
+ ret = qio_channel_write_all(p->c, (void *)p->packet,
+ p->packet_len, &local_err);
+ if (ret != 0) {
+ break;
+ }
+ } else {
+ /* Send header using the same writev call */
+ p->iov[0].iov_len = p->packet_len;
+ p->iov[0].iov_base = p->packet;
+ }
ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
&local_err);
--
2.35.3

@ -0,0 +1,163 @@
From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001
From: Leonardo Bras <leobras@redhat.com>
Date: Fri, 13 May 2022 03:28:35 -0300
Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on
error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Leonardo Brás <leobras@redhat.com>
RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd
RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm)
RH-Bugzilla: 1968509
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
Even though multifd_send_sync_main() currently emits error_reports, it's
callers don't really check it before continuing.
Change multifd_send_sync_main() to return -1 on error and 0 on success.
Also change all it's callers to make use of this change and possibly fail
earlier.
(This change is important to next patch on multifd zero copy
implementation, to make it sure an error in zero-copy flush does not go
unnoticed.
Signed-off-by: Leonardo Bras <leobras@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <20220513062836.965425-7-leobras@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007)
Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
migration/multifd.c | 10 ++++++----
migration/multifd.h | 2 +-
migration/ram.c | 29 ++++++++++++++++++++++-------
3 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/migration/multifd.c b/migration/multifd.c
index 43998ad117..cdb57439a7 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -568,17 +568,17 @@ void multifd_save_cleanup(void)
multifd_send_state = NULL;
}
-void multifd_send_sync_main(QEMUFile *f)
+int multifd_send_sync_main(QEMUFile *f)
{
int i;
if (!migrate_use_multifd()) {
- return;
+ return 0;
}
if (multifd_send_state->pages->num) {
if (multifd_send_pages(f) < 0) {
error_report("%s: multifd_send_pages fail", __func__);
- return;
+ return -1;
}
}
for (i = 0; i < migrate_multifd_channels(); i++) {
@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f)
if (p->quit) {
error_report("%s: channel %d has already quit", __func__, i);
qemu_mutex_unlock(&p->mutex);
- return;
+ return -1;
}
p->packet_num = multifd_send_state->packet_num++;
@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f)
qemu_sem_wait(&p->sem_sync);
}
trace_multifd_send_sync_main(multifd_send_state->packet_num);
+
+ return 0;
}
static void *multifd_send_thread(void *opaque)
diff --git a/migration/multifd.h b/migration/multifd.h
index 4dda900a0b..cd495195ce 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp);
bool multifd_recv_all_channels_created(void);
bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
void multifd_recv_sync_main(void);
-void multifd_send_sync_main(QEMUFile *f);
+int multifd_send_sync_main(QEMUFile *f);
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
/* Multifd Compression flags */
diff --git a/migration/ram.c b/migration/ram.c
index 0ef4bd63eb..fb6db54642 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMState **rsp = opaque;
RAMBlock *block;
+ int ret;
if (compress_threads_save_setup()) {
return -1;
@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
- multifd_send_sync_main(f);
+ ret = multifd_send_sync_main(f);
+ if (ret < 0) {
+ return ret;
+ }
+
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
out:
if (ret >= 0
&& migration_is_setup_or_active(migrate_get_current()->state)) {
- multifd_send_sync_main(rs->f);
+ ret = multifd_send_sync_main(rs->f);
+ if (ret < 0) {
+ return ret;
+ }
+
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
qemu_fflush(f);
ram_transferred_add(8);
@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
}
- if (ret >= 0) {
- multifd_send_sync_main(rs->f);
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
- qemu_fflush(f);
+ if (ret < 0) {
+ return ret;
}
- return ret;
+ ret = multifd_send_sync_main(rs->f);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+ qemu_fflush(f);
+
+ return 0;
}
static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
--
2.35.3

@ -0,0 +1,381 @@
From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 11 May 2022 19:49:24 -0500
Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers
RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm)
RH-Bugzilla: 1708300
RH-Acked-by: Nir Soffer <nsoffer@redhat.com>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
According to the NBD spec, a server that advertises
NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will
not see any cache inconsistencies: when properly separated by a single
flush, actions performed by one client will be visible to another
client, regardless of which client did the flush.
We always satisfy these conditions in qemu - even when we support
multiple clients, ALL clients go through a single point of reference
into the block layer, with no local caching. The effect of one client
is instantly visible to the next client. Even if our backend were a
network device, we argue that any multi-path caching effects that
would cause inconsistencies in back-to-back actions not seeing the
effect of previous actions would be a bug in that backend, and not the
fault of caching in qemu. As such, it is safe to unconditionally
advertise CAN_MULTI_CONN for any qemu NBD server situation that
supports parallel clients.
Note, however, that we don't want to advertise CAN_MULTI_CONN when we
know that a second client cannot connect (for historical reasons,
qemu-nbd defaults to a single connection while nbd-server-add and QMP
commands default to unlimited connections; but we already have
existing means to let either style of NBD server creation alter those
defaults). This is visible by no longer advertising MULTI_CONN for
'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation.
The harder part of this patch is setting up an iotest to demonstrate
behavior of multiple NBD clients to a single server. It might be
possible with parallel qemu-io processes, but I found it easier to do
in python with the help of libnbd, and help from Nir and Vladimir in
writing the test.
Signed-off-by: Eric Blake <eblake@redhat.com>
Suggested-by: Nir Soffer <nsoffer@redhat.com>
Suggested-by: Vladimir Sementsov-Ogievskiy <v.sementsov-og@mail.ru>
Message-Id: <20220512004924.417153-3-eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc)
Conflicts:
nbd/server.c - context, e5fb29d5 not backported
Signed-off-by: Eric Blake <eblake@redhat.com>
---
MAINTAINERS | 1 +
blockdev-nbd.c | 5 +
docs/interop/nbd.txt | 1 +
docs/tools/qemu-nbd.rst | 3 +-
include/block/nbd.h | 3 +-
nbd/server.c | 10 +-
qapi/block-export.json | 8 +-
tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++
tests/qemu-iotests/tests/nbd-multiconn.out | 5 +
.../tests/nbd-qemu-allocation.out | 2 +-
10 files changed, 172 insertions(+), 11 deletions(-)
create mode 100755 tests/qemu-iotests/tests/nbd-multiconn
create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ad2451e03..2fe20a49ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3370,6 +3370,7 @@ F: qemu-nbd.*
F: blockdev-nbd.c
F: docs/interop/nbd.txt
F: docs/tools/qemu-nbd.rst
+F: tests/qemu-iotests/tests/*nbd*
T: git https://repo.or.cz/qemu/ericb.git nbd
T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index add41a23af..c6d9b0324c 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -44,6 +44,11 @@ bool nbd_server_is_running(void)
return nbd_server || qemu_nbd_connections >= 0;
}
+int nbd_server_max_connections(void)
+{
+ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections;
+}
+
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
{
nbd_client_put(client);
diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index bdb0f2a41a..f5ca25174a 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
* 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports,
NBD_CMD_FLAG_FAST_ZERO
* 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth"
+* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports
diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index 4c950f6199..8e08a29e89 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified.
.. option:: -e, --shared=NUM
Allow up to *NUM* clients to share the device (default
- ``1``), 0 for unlimited. Safe for readers, but for now,
- consistency is not guaranteed between multiple writers.
+ ``1``), 0 for unlimited.
.. option:: -t, --persistent
diff --git a/include/block/nbd.h b/include/block/nbd.h
index c5a29ce1c6..c74b7a9d2e 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2020 Red Hat, Inc.
+ * Copyright (C) 2016-2022 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device
@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client);
void nbd_server_is_qemu_nbd(int max_connections);
bool nbd_server_is_running(void);
+int nbd_server_max_connections(void);
void nbd_server_start(SocketAddress *addr, const char *tls_creds,
const char *tls_authz, uint32_t max_connections,
Error **errp);
diff --git a/nbd/server.c b/nbd/server.c
index c5644fd3f6..6e2157acfa 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2016-2021 Red Hat, Inc.
+ * Copyright (C) 2016-2022 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device Server Side
@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
int64_t size;
uint64_t perm, shared_perm;
bool readonly = !exp_args->writable;
- bool shared = !exp_args->writable;
strList *bitmaps;
size_t i;
int ret;
@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
exp->description = g_strdup(arg->description);
exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
+
+ if (nbd_server_max_connections() != 1) {
+ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
+ }
if (readonly) {
exp->nbdflags |= NBD_FLAG_READ_ONLY;
- if (shared) {
- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN;
- }
} else {
exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES |
NBD_FLAG_SEND_FAST_ZERO);
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 1e34927f85..755ccc89b1 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -21,7 +21,9 @@
# recreated on the fly while the NBD server is active.
# If missing, it will default to denying access (since 4.0).
# @max-connections: The maximum number of connections to allow at the same
-# time, 0 for unlimited. (since 5.2; default: 0)
+# time, 0 for unlimited. Setting this to 1 also stops
+# the server from advertising multiple client support
+# (since 5.2; default: 0)
#
# Since: 4.2
##
@@ -50,7 +52,9 @@
# recreated on the fly while the NBD server is active.
# If missing, it will default to denying access (since 4.0).
# @max-connections: The maximum number of connections to allow at the same
-# time, 0 for unlimited. (since 5.2; default: 0)
+# time, 0 for unlimited. Setting this to 1 also stops
+# the server from advertising multiple client support
+# (since 5.2; default: 0).
#
# Returns: error if the server is already running.
#
diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn
new file mode 100755
index 0000000000..b121f2e363
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-multiconn
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# group: rw auto quick
+#
+# Test cases for NBD multi-conn advertisement
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+from contextlib import contextmanager
+import iotests
+from iotests import qemu_img_create, qemu_io
+
+
+disk = os.path.join(iotests.test_dir, 'disk')
+size = '4M'
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock')
+nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock
+
+
+@contextmanager
+def open_nbd(export_name):
+ h = nbd.NBD()
+ try:
+ h.connect_uri(nbd_uri.format(export_name))
+ yield h
+ finally:
+ h.shutdown()
+
+class TestNbdMulticonn(iotests.QMPTestCase):
+ def setUp(self):
+ qemu_img_create('-f', iotests.imgfmt, disk, size)
+ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk)
+
+ self.vm = iotests.VM()
+ self.vm.launch()
+ result = self.vm.qmp('blockdev-add', {
+ 'driver': 'qcow2',
+ 'node-name': 'n',
+ 'file': {'driver': 'file', 'filename': disk}
+ })
+ self.assert_qmp(result, 'return', {})
+
+ def tearDown(self):
+ self.vm.shutdown()
+ os.remove(disk)
+ try:
+ os.remove(nbd_sock)
+ except OSError:
+ pass
+
+ @contextmanager
+ def run_server(self, max_connections=None):
+ args = {
+ 'addr': {
+ 'type': 'unix',
+ 'data': {'path': nbd_sock}
+ }
+ }
+ if max_connections is not None:
+ args['max-connections'] = max_connections
+
+ result = self.vm.qmp('nbd-server-start', args)
+ self.assert_qmp(result, 'return', {})
+ yield
+
+ result = self.vm.qmp('nbd-server-stop')
+ self.assert_qmp(result, 'return', {})
+
+ def add_export(self, name, writable=None):
+ args = {
+ 'type': 'nbd',
+ 'id': name,
+ 'node-name': 'n',
+ 'name': name,
+ }
+ if writable is not None:
+ args['writable'] = writable
+
+ result = self.vm.qmp('block-export-add', args)
+ self.assert_qmp(result, 'return', {})
+
+ def test_default_settings(self):
+ with self.run_server():
+ self.add_export('r')
+ self.add_export('w', writable=True)
+ with open_nbd('r') as h:
+ self.assertTrue(h.can_multi_conn())
+ with open_nbd('w') as h:
+ self.assertTrue(h.can_multi_conn())
+
+ def test_limited_connections(self):
+ with self.run_server(max_connections=1):
+ self.add_export('r')
+ self.add_export('w', writable=True)
+ with open_nbd('r') as h:
+ self.assertFalse(h.can_multi_conn())
+ with open_nbd('w') as h:
+ self.assertFalse(h.can_multi_conn())
+
+ def test_parallel_writes(self):
+ with self.run_server():
+ self.add_export('w', writable=True)
+
+ clients = [nbd.NBD() for _ in range(3)]
+ for c in clients:
+ c.connect_uri(nbd_uri.format('w'))
+ self.assertTrue(c.can_multi_conn())
+
+ initial_data = clients[0].pread(1024 * 1024, 0)
+ self.assertEqual(initial_data, b'\x01' * 1024 * 1024)
+
+ updated_data = b'\x03' * 1024 * 1024
+ clients[1].pwrite(updated_data, 0)
+ clients[2].flush()
+ current_data = clients[0].pread(1024 * 1024, 0)
+
+ self.assertEqual(updated_data, current_data)
+
+ for i in range(3):
+ clients[i].shutdown()
+
+
+if __name__ == '__main__':
+ try:
+ # Easier to use libnbd than to try and set up parallel
+ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems
+ # have libnbd installed.
+ import nbd # type: ignore
+
+ iotests.main(supported_fmts=['qcow2'])
+ except ImportError:
+ iotests.notrun('libnbd not installed')
diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out
new file mode 100644
index 0000000000..8d7e996700
--- /dev/null
+++ b/tests/qemu-iotests/tests/nbd-multiconn.out
@@ -0,0 +1,5 @@
+...
+----------------------------------------------------------------------
+Ran 3 tests
+
+OK
diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
index 0bf1abb063..9d938db24e 100644
--- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576
exports available: 1
export: ''
size: 4194304
- flags: 0x58f ( readonly flush fua df multi cache )
+ flags: 0x48f ( readonly flush fua df cache )
min block: 1
opt block: 4096
max block: 33554432
--
2.31.1

@ -0,0 +1,63 @@
From 03996a8a826c9186e4a16e1b4757f1ef5947a503 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 5 Aug 2022 11:42:14 +0200
Subject: [PATCH 07/11] pc-bios/s390-ccw: Fix booting with logical block size <
physical block size
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 113: pc-bios/s390-ccw: Fix booting with logical block size < physical block size
RH-Commit: [1/1] a45ff477bc7d7011ea6c4d42a1aade213d1e4690 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2112303
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Claudio Imbrenda <None>
For accessing single blocks during boot, it's the logical block size that
matters. (Physical block sizes are rather interesting e.g. for creating
file systems with the correct alignment for speed reasons etc.).
So the s390-ccw bios has to use the logical block size for calculating
sector numbers during the boot phase, the "physical_block_exp" shift
value must not be taken into account. This change fixes the boot process
when the guest hast been installed on a disk where the logical block size
differs from the physical one, e.g. if the guest has been installed
like this:
qemu-system-s390x -nographic -accel kvm -m 2G \
-drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \
-device virtio-scsi -device scsi-cd,drive=d1 \
-drive if=none,id=d2,file=test.qcow2,format=qcow2
-device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512
Linux correctly uses the logical block size of 512 for the installation,
but the s390-ccw bios tries to boot from a disk with 4096 block size so
far, as long as this patch has not been applied yet (well, it used to work
by accident in the past due to the virtio_assume_scsi() hack that used to
enforce 512 byte sectors on all virtio-block disks, but that hack has been
well removed in commit 5447de2619050a0a4d to fix other scenarios).
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112303
Message-Id: <20220805094214.285223-1-thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363)
---
pc-bios/s390-ccw/virtio-blkdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index 8271c47296..794f99b42c 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -173,7 +173,7 @@ int virtio_get_block_size(void)
switch (vdev->senseid.cu_model) {
case VIRTIO_ID_BLOCK:
- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp;
+ return vdev->config.blk.blk_size;
case VIRTIO_ID_SCSI:
return vdev->scsi_block_size;
}
--
2.31.1

@ -0,0 +1,180 @@
From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from
virtio_blk_setup_device()
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:19:00 2022 +0200
pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device()
The next patch is going to add more virtio-block specific code to
virtio_blk_setup_device(), and if the virtio-scsi code is also in
there, this is more cumbersome. And the calling function virtio_setup()
in main.c looks at the device type already anyway, so it's more
logical to separate the virtio-scsi stuff into a new function in
virtio-scsi.c instead.
Message-Id: <20220704111903.62400-10-thuth@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/main.c | 24 +++++++++++++++++-------
pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------
pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++-
pc-bios/s390-ccw/virtio-scsi.h | 2 +-
4 files changed, 38 insertions(+), 27 deletions(-)
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 5d2b7ba94d..13e1d8fdf7 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -14,6 +14,7 @@
#include "s390-ccw.h"
#include "cio.h"
#include "virtio.h"
+#include "virtio-scsi.h"
#include "dasd-ipl.h"
char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
@@ -218,6 +219,7 @@ static int virtio_setup(void)
{
VDev *vdev = virtio_get_device();
QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
+ int ret;
memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));
@@ -225,18 +227,26 @@ static int virtio_setup(void)
menu_setup();
}
- if (virtio_get_device_type() == VIRTIO_ID_NET) {
+ switch (vdev->senseid.cu_model) {
+ case VIRTIO_ID_NET:
sclp_print("Network boot device detected\n");
vdev->netboot_start_addr = qipl.netboot_start_addr;
- } else {
- int ret = virtio_blk_setup_device(blk_schid);
- if (ret) {
- return ret;
- }
+ return 0;
+ case VIRTIO_ID_BLOCK:
+ ret = virtio_blk_setup_device(blk_schid);
+ break;
+ case VIRTIO_ID_SCSI:
+ ret = virtio_scsi_setup_device(blk_schid);
+ break;
+ default:
+ panic("\n! No IPL device available !\n");
+ }
+
+ if (!ret) {
IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
}
- return 0;
+ return ret;
}
static void ipl_boot_device(void)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index db1f7f44aa..c175b66a47 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void)
int virtio_blk_setup_device(SubChannelId schid)
{
VDev *vdev = virtio_get_device();
- int ret = 0;
vdev->schid = schid;
virtio_setup_ccw(vdev);
- switch (vdev->senseid.cu_model) {
- case VIRTIO_ID_BLOCK:
- sclp_print("Using virtio-blk.\n");
- break;
- case VIRTIO_ID_SCSI:
- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
- "Config: sense size mismatch");
- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE,
- "Config: CDB size mismatch");
+ sclp_print("Using virtio-blk.\n");
- sclp_print("Using virtio-scsi.\n");
- ret = virtio_scsi_setup(vdev);
- break;
- default:
- panic("\n! No IPL device available !\n");
- }
-
- return ret;
+ return 0;
}
diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c
index 2c8d0f3097..3b7069270c 100644
--- a/pc-bios/s390-ccw/virtio-scsi.c
+++ b/pc-bios/s390-ccw/virtio-scsi.c
@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data,
}
}
-int virtio_scsi_setup(VDev *vdev)
+static int virtio_scsi_setup(VDev *vdev)
{
int retry_test_unit_ready = 3;
uint8_t data[256];
@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev)
return 0;
}
+
+int virtio_scsi_setup_device(SubChannelId schid)
+{
+ VDev *vdev = virtio_get_device();
+
+ vdev->schid = schid;
+ virtio_setup_ccw(vdev);
+
+ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
+ "Config: sense size mismatch");
+ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE,
+ "Config: CDB size mismatch");
+
+ sclp_print("Using virtio-scsi.\n");
+
+ return virtio_scsi_setup(vdev);
+}
diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h
index 4b14c2c2f9..e6b6cd4815 100644
--- a/pc-bios/s390-ccw/virtio-scsi.h
+++ b/pc-bios/s390-ccw/virtio-scsi.h
@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r)
return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD;
}
-int virtio_scsi_setup(VDev *vdev);
int virtio_scsi_read_many(VDev *vdev,
ulong sector, void *load_addr, int sec_num);
+int virtio_scsi_setup_device(SubChannelId schid);
#endif /* VIRTIO_SCSI_H */
--
2.31.1

@ -0,0 +1,102 @@
From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in
zipl_load_vblk()
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 422865f6672ee1482b98d18321b55c1ecfb06c82
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:54 2022 +0200
pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk()
The logic of trying an final ISO or ECKD boot on virtio-block devices is
very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(),
virtio_blk_setup_device() always sets a "guessed" disk geometry via
virtio_assume_scsi() (which is certainly also wrong in a lot of cases).
zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature"
and tries to fix up the geometry again via virtio_assume_iso9660() before
always trying to do ipl_iso_el_torito(). That's a very brain-twisting
way of attempting to boot from ISO images, which won't work anymore after
the following patches that will clean up the virtio_assume_scsi() mess
(and thus get rid of the "virtio_guessed_disk_nature" here).
Let's try a better approach instead: ISO files always have a magic
string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification)
which we can use to decide whether we should try to boot in ISO 9660
mode (which we should also try if we see a sector size of 2048).
And if we were not able to boot in ISO mode here, the final boot attempt
before panicking is to boot in ECKD mode. Since this is our last boot
attempt anyway, simply always assume the ECKD geometry here (if the sector
size was not 4096 yet), so that we also do not depend on the guessed disk
geometry from virtio_blk_setup_device() here anymore.
Message-Id: <20220704111903.62400-4-thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++----
1 file changed, 23 insertions(+), 4 deletions(-)
diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index 56411ab3b6..994e59c0b0 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void)
}
}
+/**
+ * Detect whether we're trying to boot from an .ISO image.
+ * These always have a signature string "CD001" at offset 0x8001.
+ */
+static bool has_iso_signature(void)
+{
+ int blksize = virtio_get_block_size();
+
+ if (!blksize || virtio_read(0x8000 / blksize, sec)) {
+ return false;
+ }
+
+ return !memcmp("CD001", &sec[1], 5);
+}
+
/***********************************************************************
* Bus specific IPL sequences
*/
static void zipl_load_vblk(void)
{
- if (virtio_guessed_disk_nature()) {
- virtio_assume_iso9660();
+ int blksize = virtio_get_block_size();
+
+ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) {
+ if (blksize != VIRTIO_ISO_BLOCK_SIZE) {
+ virtio_assume_iso9660();
+ }
+ ipl_iso_el_torito();
}
- ipl_iso_el_torito();
- if (virtio_guessed_disk_nature()) {
+ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) {
sclp_print("Using guessed DASD geometry.\n");
virtio_assume_eckd();
}
--
2.31.1

@ -0,0 +1,78 @@
From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings
about GNU extensions
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit e2269220acb03e6c6a460c3090d804835e202239
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:19:03 2022 +0200
pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions
When compiling the s390-ccw bios with Clang (v14.0), there is currently
an unuseful warning like this:
CC pc-bios/s390-ccw/ipv6.o
../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array
folded to constant array as an extension [-Wgnu-folding-constant]
unsigned short raw[ip6size];
^
SLOF is currently GCC-only and cannot be compiled with Clang yet, so
it is expected that such extensions sneak in there - and as long as
we don't want to compile the code with a compiler that is neither GCC
or Clang, it is also not necessary to avoid such extensions.
Thus these GNU-extension related warnings are completely useless in
the s390-ccw bios, especially in the code that is coming from SLOF,
so we should simply disable the related warnings here now.
Message-Id: <20220704111903.62400-13-thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/netboot.mak | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak
index 68b4d7edcb..ad41898cb6 100644
--- a/pc-bios/s390-ccw/netboot.mak
+++ b/pc-bios/s390-ccw/netboot.mak
@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a
s390-netboot.img: s390-netboot.elf
$(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@")
+# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here
+NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu)
+
# libc files:
-LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \
+LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \
-MMD -MP -MT $@ -MF $(@:%.o=%.d)
CTYPE_OBJS = isdigit.o isxdigit.o toupper.o
@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS)
LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \
dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o
-LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \
+LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \
-DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d)
%.o : $(SLOF_DIR)/lib/libnet/%.c
--
2.31.1

@ -0,0 +1,56 @@
From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading
virtqueue configuration
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 070824885741f5d2a66626d3c4ecb2773c8e0552
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:59 2022 +0200
pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration
It looks nicer if we separate the run_ccw() from the IPL_assert()
statement, and the error message should talk about "virtio device"
instead of "block device", since this code is nowadays used for
non-block (i.e. network) devices, too.
Message-Id: <20220704111903.62400-9-thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index d8c2b52710..f37510f312 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev)
.num = 0,
};
- IPL_assert(
- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0,
- "Could not get block device VQ configuration");
+ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false);
+ IPL_assert(rc == 0, "Could not get virtio device VQ configuration");
info.num = config.num;
vring_init(&vdev->vrings[i], &info);
vdev->vrings[i].schid = vdev->schid;
--
2.31.1

@ -0,0 +1,63 @@
From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD
block size
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:53 2022 +0200
pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size
Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096.
Message-Id: <20220704111903.62400-3-thuth@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio-blkdev.c | 2 +-
pc-bios/s390-ccw/virtio.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index 7d35050292..6483307630 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -155,7 +155,7 @@ void virtio_assume_eckd(void)
vdev->config.blk.physical_block_exp = 0;
switch (vdev->senseid.cu_model) {
case VIRTIO_ID_BLOCK:
- vdev->config.blk.blk_size = 4096;
+ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE;
break;
case VIRTIO_ID_SCSI:
vdev->config.blk.blk_size = vdev->scsi_block_size;
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 19fceb6495..9e410bde6f 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num);
#define VIRTIO_SECTOR_SIZE 512
#define VIRTIO_ISO_BLOCK_SIZE 2048
#define VIRTIO_SCSI_BLOCK_SIZE 512
+#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096
static inline ulong virtio_sector_adjust(ulong sector)
{
--
2.31.1

@ -0,0 +1,67 @@
From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after
feature negotiation
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit aa5c69ce99411c4886bcd051f288afc02b6d968d
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:58 2022 +0200
pc-bios/s390-ccw/virtio: Read device config after feature negotiation
Feature negotiation should be done first, since some fields in the
config area can depend on the negotiated features and thus should
rather be read afterwards.
While we're at it, also adjust the error message here a little bit
(the code is nowadays used for non-block virtio devices, too).
Message-Id: <20220704111903.62400-8-thuth@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index 4e85a2eb82..d8c2b52710 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev)
rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
IPL_assert(rc == 0, "Could not write DRIVER status to host");
- IPL_assert(
- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0,
- "Could not get block device configuration");
-
/* Feature negotiation */
for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) {
feats.features = 0;
@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev)
IPL_assert(rc == 0, "Could not set features bits");
}
+ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false);
+ IPL_assert(rc == 0, "Could not get virtio device configuration");
+
for (i = 0; i < vdev->nr_vqs; i++) {
VqInfo info = {
.queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE),
--
2.31.1

@ -0,0 +1,93 @@
From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while
initializing
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:57 2022 +0200
pc-bios/s390-ccw/virtio: Set missing status bits while initializing
According chapter "3.1.1 Driver Requirements: Device Initialization"
of the Virtio specification (v1.1), a driver for a device has to set
the ACKNOWLEDGE and DRIVER bits in the status field after resetting
the device. The s390-ccw bios skipped these steps so far and seems
like QEMU never cared. Anyway, it's better to follow the spec, so
let's set these bits now in the right spots, too.
Message-Id: <20220704111903.62400-7-thuth@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index 5d2c6e3381..4e85a2eb82 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd)
void virtio_setup_ccw(VDev *vdev)
{
int i, rc, cfg_size = 0;
- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK;
+ uint8_t status;
struct VirtioFeatureDesc {
uint32_t features;
uint8_t index;
@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev)
run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false);
+ status = VIRTIO_CONFIG_S_ACKNOWLEDGE;
+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host");
+
switch (vdev->senseid.cu_model) {
case VIRTIO_ID_NET:
vdev->nr_vqs = 2;
@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev)
default:
panic("Unsupported virtio device\n");
}
+
+ status |= VIRTIO_CONFIG_S_DRIVER;
+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+ IPL_assert(rc == 0, "Could not write DRIVER status to host");
+
IPL_assert(
run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0,
"Could not get block device configuration");
@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev)
run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0,
"Cannot set VQ info");
}
- IPL_assert(
- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0,
- "Could not write status to host");
+
+ status |= VIRTIO_CONFIG_S_DRIVER_OK;
+ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host");
}
bool virtio_is_supported(SubChannelId schid)
--
2.31.1

@ -0,0 +1,101 @@
From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove
virtio_assume_scsi()
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 5447de2619050a0a4dd480b97f88a9b58da360d1
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:56 2022 +0200
pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi()
The virtio_assume_scsi() function is very questionable: First, it
is only called for virtio-blk, and not for virtio-scsi, so the naming
is already quite confusing. Second, it is called if we detected a
"invalid" IPL disk, trying to fix it by blindly setting a sector
size of 512. This of course won't work in most cases since disks
might have a different sector size for a reason.
Thus let's remove this strange function now. The calling code can
also be removed completely, since there is another spot in main.c
that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make
sure that we do not try to IPL from an invalid device.
Message-Id: <20220704111903.62400-6-thuth@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------
pc-bios/s390-ccw/virtio.h | 1 -
2 files changed, 25 deletions(-)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index 7e13155589..db1f7f44aa 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void)
return virtio_get_device()->guessed_disk_nature;
}
-void virtio_assume_scsi(void)
-{
- VDev *vdev = virtio_get_device();
-
- switch (vdev->senseid.cu_model) {
- case VIRTIO_ID_BLOCK:
- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI;
- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE;
- vdev->config.blk.physical_block_exp = 0;
- vdev->blk_factor = 1;
- break;
- case VIRTIO_ID_SCSI:
- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE;
- break;
- }
-}
-
void virtio_assume_iso9660(void)
{
VDev *vdev = virtio_get_device();
@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid)
switch (vdev->senseid.cu_model) {
case VIRTIO_ID_BLOCK:
sclp_print("Using virtio-blk.\n");
- if (!virtio_ipl_disk_is_valid()) {
- /* make sure all getters but blocksize return 0 for
- * invalid IPL disk
- */
- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk));
- virtio_assume_scsi();
- }
break;
case VIRTIO_ID_SCSI:
IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 241730effe..600ba5052b 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -182,7 +182,6 @@ enum guessed_disk_nature_type {
typedef enum guessed_disk_nature_type VirtioGDN;
VirtioGDN virtio_guessed_disk_nature(void);
-void virtio_assume_scsi(void);
void virtio_assume_eckd(void);
void virtio_assume_iso9660(void);
--
2.31.1

@ -0,0 +1,63 @@
From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right
feature bits
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit 9125a314cca4a1838b09305a87d8efb98f80ab67
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:19:01 2022 +0200
pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits
The virtio-blk code uses the block size and geometry fields in the
config area. According to the virtio-spec, these have to be negotiated
with the right feature bits during initialization, otherwise they
might not be available. QEMU is so far very forgiving and always
provides them, but we should not rely on this behavior, so let's
better request them properly via the VIRTIO_BLK_F_GEOMETRY and
VIRTIO_BLK_F_BLK_SIZE feature bits.
Message-Id: <20220704111903.62400-11-thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index c175b66a47..8271c47296 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -13,6 +13,9 @@
#include "virtio.h"
#include "virtio-scsi.h"
+#define VIRTIO_BLK_F_GEOMETRY (1 << 4)
+#define VIRTIO_BLK_F_BLK_SIZE (1 << 6)
+
static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr,
int sec_num)
{
@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid)
{
VDev *vdev = virtio_get_device();
+ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE;
vdev->schid = schid;
virtio_setup_ccw(vdev);
--
2.31.1

@ -0,0 +1,124 @@
From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 8 Jul 2022 20:49:01 +0200
Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix
virtio_ipl_disk_is_valid()
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2098077
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
Bugzilla: http://bugzilla.redhat.com/2098077
commit bbf615f7b707f009ef8e757d170902ad33b90644
Author: Thomas Huth <thuth@redhat.com>
Date: Mon Jul 4 13:18:55 2022 +0200
pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid()
The s390-ccw bios fails to boot if the boot disk is a virtio-blk
disk with a sector size of 4096. For example:
dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX
fdasd -a /dev/dasdX
install a guest onto /dev/dasdX1 using virtio-blk
qemu-system-s390x -nographic -hda /dev/dasdX1
The bios then bails out with:
! Cannot read block 0 !
Looking at virtio_ipl_disk_is_valid() and especially the function
virtio_disk_is_scsi(), it does not really make sense that we expect
only such a limited disk geometry (like a block size of 512) for
our boot disks. Let's relax the check and allow everything that
remotely looks like a sane disk.
Message-Id: <20220704111903.62400-5-thuth@redhat.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++--------------------------
pc-bios/s390-ccw/virtio.h | 2 --
2 files changed, 7 insertions(+), 36 deletions(-)
diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
index 6483307630..7e13155589 100644
--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
@@ -166,46 +166,19 @@ void virtio_assume_eckd(void)
virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size);
}
-bool virtio_disk_is_scsi(void)
-{
- VDev *vdev = virtio_get_device();
-
- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) {
- return true;
- }
- switch (vdev->senseid.cu_model) {
- case VIRTIO_ID_BLOCK:
- return (vdev->config.blk.geometry.heads == 255)
- && (vdev->config.blk.geometry.sectors == 63)
- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE);
- case VIRTIO_ID_SCSI:
- return true;
- }
- return false;
-}
-
-bool virtio_disk_is_eckd(void)
+bool virtio_ipl_disk_is_valid(void)
{
+ int blksize = virtio_get_block_size();
VDev *vdev = virtio_get_device();
- const int block_size = virtio_get_block_size();
- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) {
+ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI ||
+ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) {
return true;
}
- switch (vdev->senseid.cu_model) {
- case VIRTIO_ID_BLOCK:
- return (vdev->config.blk.geometry.heads == 15)
- && (vdev->config.blk.geometry.sectors ==
- virtio_eckd_sectors_for_block_size(block_size));
- case VIRTIO_ID_SCSI:
- return false;
- }
- return false;
-}
-bool virtio_ipl_disk_is_valid(void)
-{
- return virtio_disk_is_scsi() || virtio_disk_is_eckd();
+ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK ||
+ vdev->senseid.cu_model == VIRTIO_ID_SCSI) &&
+ blksize >= 512 && blksize <= 4096;
}
int virtio_get_block_size(void)
diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
index 9e410bde6f..241730effe 100644
--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
@@ -186,8 +186,6 @@ void virtio_assume_scsi(void);
void virtio_assume_eckd(void);
void virtio_assume_iso9660(void);
-extern bool virtio_disk_is_scsi(void);
-extern bool virtio_disk_is_eckd(void);
extern bool virtio_ipl_disk_is_valid(void);
extern int virtio_get_block_size(void);
extern uint8_t virtio_get_heads(void);
--
2.31.1

@ -0,0 +1,126 @@
From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:02 +0800
Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
This adds cluster-id in CPU instance properties, which will be used
by arm/virt machine. Besides, the cluster-id is also verified or
dumped in various spots:
* hw/core/machine.c::machine_set_cpu_numa_node() to associate
CPU with its NUMA node.
* hw/core/machine.c::machine_numa_finish_cpu_init() to record
CPU slots with no NUMA mapping set.
* hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump
cluster-id.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 20220503140304.855514-2-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
hw/core/machine-hmp-cmds.c | 4 ++++
hw/core/machine.c | 16 ++++++++++++++++
qapi/machine.json | 6 ++++--
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index 4e2f319aeb..5cb5eecbfc 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict)
if (c->has_die_id) {
monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id);
}
+ if (c->has_cluster_id) {
+ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n",
+ c->cluster_id);
+ }
if (c->has_core_id) {
monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id);
}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index dffc3ef4ab..168f4de910 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine,
return;
}
+ if (props->has_cluster_id && !slot->props.has_cluster_id) {
+ error_setg(errp, "cluster-id is not supported");
+ return;
+ }
+
if (props->has_socket_id && !slot->props.has_socket_id) {
error_setg(errp, "socket-id is not supported");
return;
@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine,
continue;
}
+ if (props->has_cluster_id &&
+ props->cluster_id != slot->props.cluster_id) {
+ continue;
+ }
+
if (props->has_die_id && props->die_id != slot->props.die_id) {
continue;
}
@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu)
}
g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id);
}
+ if (cpu->props.has_cluster_id) {
+ if (s->len) {
+ g_string_append_printf(s, ", ");
+ }
+ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id);
+ }
if (cpu->props.has_core_id) {
if (s->len) {
g_string_append_printf(s, ", ");
diff --git a/qapi/machine.json b/qapi/machine.json
index d25a481ce4..4c417e32a5 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -868,10 +868,11 @@
# @node-id: NUMA node ID the CPU belongs to
# @socket-id: socket number within node/board the CPU belongs to
# @die-id: die number within socket the CPU belongs to (since 4.1)
-# @core-id: core number within die the CPU belongs to
+# @cluster-id: cluster number within die the CPU belongs to (since 7.1)
+# @core-id: core number within cluster the CPU belongs to
# @thread-id: thread number within core the CPU belongs to
#
-# Note: currently there are 5 properties that could be present
+# Note: currently there are 6 properties that could be present
# but management should be prepared to pass through other
# properties with device_add command to allow for future
# interface extension. This also requires the filed names to be kept in
@@ -883,6 +884,7 @@
'data': { '*node-id': 'int',
'*socket-id': 'int',
'*die-id': 'int',
+ '*cluster-id': 'int',
'*core-id': 'int',
'*thread-id': 'int'
}
--
2.31.1

@ -0,0 +1,162 @@
From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Tue, 5 Apr 2022 15:46:52 +0200
Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure()
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s)
RH-Bugzilla: 2072379
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Instead of fprint()-ing error messages in rebuild_refcount_structure()
and its rebuild_refcounts_write_refblocks() helper, pass them through an
Error object to qcow2_check_refcounts() (which will then print it).
Suggested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220405134652.19278-4-hreitz@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/qcow2-refcount.c | 33 +++++++++++++++++++--------------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index c5669eaa51..ed0ecfaa89 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
static int rebuild_refcounts_write_refblocks(
BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
int64_t first_cluster, int64_t end_cluster,
- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
+ Error **errp
)
{
BDRVQcow2State *s = bs->opaque;
@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks(
nb_clusters,
&first_free_cluster);
if (refblock_offset < 0) {
- fprintf(stderr, "ERROR allocating refblock: %s\n",
- strerror(-refblock_offset));
+ error_setg_errno(errp, -refblock_offset,
+ "ERROR allocating refblock");
return refblock_offset;
}
@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks(
on_disk_reftable_entries *
REFTABLE_ENTRY_SIZE);
if (!on_disk_reftable) {
+ error_setg(errp, "ERROR allocating reftable memory");
return -ENOMEM;
}
@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks(
ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
s->cluster_size, false);
if (ret < 0) {
- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing refblock");
return ret;
}
@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks(
ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
s->cluster_size);
if (ret < 0) {
- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing refblock");
return ret;
}
@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks(
static int rebuild_refcount_structure(BlockDriverState *bs,
BdrvCheckResult *res,
void **refcount_table,
- int64_t *nb_clusters)
+ int64_t *nb_clusters,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int64_t reftable_offset = -1;
@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
0, *nb_clusters,
&on_disk_reftable,
- &on_disk_reftable_entries);
+ &on_disk_reftable_entries, errp);
if (reftable_size_changed < 0) {
res->check_errors++;
ret = reftable_size_changed;
@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
refcount_table, nb_clusters,
&first_free_cluster);
if (reftable_offset < 0) {
- fprintf(stderr, "ERROR allocating reftable: %s\n",
- strerror(-reftable_offset));
+ error_setg_errno(errp, -reftable_offset,
+ "ERROR allocating reftable");
res->check_errors++;
ret = reftable_offset;
goto fail;
@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
reftable_start_cluster,
reftable_end_cluster,
&on_disk_reftable,
- &on_disk_reftable_entries);
+ &on_disk_reftable_entries, errp);
if (reftable_size_changed < 0) {
res->check_errors++;
ret = reftable_size_changed;
@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
false);
if (ret < 0) {
- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing reftable");
goto fail;
}
@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
reftable_length);
if (ret < 0) {
- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing reftable");
goto fail;
}
@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR setting reftable");
goto fail;
}
@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
if (rebuild && (fix & BDRV_FIX_ERRORS)) {
BdrvCheckResult old_res = *res;
int fresh_leaks = 0;
+ Error *local_err = NULL;
fprintf(stderr, "Rebuilding refcount structure\n");
ret = rebuild_refcount_structure(bs, res, &refcount_table,
- &nb_clusters);
+ &nb_clusters, &local_err);
if (ret < 0) {
+ error_report_err(local_err);
goto fail;
}
--
2.31.1

@ -0,0 +1,465 @@
From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001
From: Hanna Reitz <hreitz@redhat.com>
Date: Tue, 5 Apr 2022 15:46:50 +0200
Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding
RH-Author: Hanna Reitz <hreitz@redhat.com>
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s)
RH-Bugzilla: 2072379
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Eric Blake <eblake@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
When rebuilding the refcount structures (when qemu-img check -r found
errors with refcount = 0, but reference count > 0), the new refcount
table defaults to being put at the image file end[1]. There is no good
reason for that except that it means we will not have to rewrite any
refblocks we already wrote to disk.
Changing the code to rewrite those refblocks is not too difficult,
though, so let us do that. That is beneficial for images on block
devices, where we cannot really write beyond the end of the image file.
Use this opportunity to add extensive comments to the code, and refactor
it a bit, getting rid of the backwards-jumping goto.
[1] Unless there is something allocated in the area pointed to by the
last refblock, so we have to write that refblock. In that case, we
try to put the reftable in there.
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071
Closes: https://gitlab.com/qemu-project/qemu/-/issues/941
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
Message-Id: <20220405134652.19278-2-hreitz@redhat.com>
(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e)
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
---
block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------
1 file changed, 235 insertions(+), 97 deletions(-)
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index b91499410c..c5669eaa51 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
}
/*
- * Creates a new refcount structure based solely on the in-memory information
- * given through *refcount_table. All necessary allocations will be reflected
- * in that array.
+ * Helper function for rebuild_refcount_structure().
*
- * On success, the old refcount structure is leaked (it will be covered by the
- * new refcount structure).
+ * Scan the range of clusters [first_cluster, end_cluster) for allocated
+ * clusters and write all corresponding refblocks to disk. The refblock
+ * and allocation data is taken from the in-memory refcount table
+ * *refcount_table[] (of size *nb_clusters), which is basically one big
+ * (unlimited size) refblock for the whole image.
+ *
+ * For these refblocks, clusters are allocated using said in-memory
+ * refcount table. Care is taken that these allocations are reflected
+ * in the refblocks written to disk.
+ *
+ * The refblocks' offsets are written into a reftable, which is
+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If
+ * that reftable is of insufficient size, it will be resized to fit.
+ * This reftable is not written to disk.
+ *
+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
+ * to point to existing valid refblocks that do not need to be allocated
+ * again.)
+ *
+ * Return whether the on-disk reftable array was resized (true/false),
+ * or -errno on error.
*/
-static int rebuild_refcount_structure(BlockDriverState *bs,
- BdrvCheckResult *res,
- void **refcount_table,
- int64_t *nb_clusters)
+static int rebuild_refcounts_write_refblocks(
+ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+ int64_t first_cluster, int64_t end_cluster,
+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
+ )
{
BDRVQcow2State *s = bs->opaque;
- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
+ int64_t cluster;
int64_t refblock_offset, refblock_start, refblock_index;
- uint32_t reftable_size = 0;
- uint64_t *on_disk_reftable = NULL;
+ int64_t first_free_cluster = 0;
+ uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
+ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
void *on_disk_refblock;
- int ret = 0;
- struct {
- uint64_t reftable_offset;
- uint32_t reftable_clusters;
- } QEMU_PACKED reftable_offset_and_clusters;
-
- qcow2_cache_empty(bs, s->refcount_block_cache);
+ bool reftable_grown = false;
+ int ret;
-write_refblocks:
- for (; cluster < *nb_clusters; cluster++) {
+ for (cluster = first_cluster; cluster < end_cluster; cluster++) {
+ /* Check all clusters to find refblocks that contain non-zero entries */
if (!s->get_refcount(*refcount_table, cluster)) {
continue;
}
+ /*
+ * This cluster is allocated, so we need to create a refblock
+ * for it. The data we will write to disk is just the
+ * respective slice from *refcount_table, so it will contain
+ * accurate refcounts for all clusters belonging to this
+ * refblock. After we have written it, we will therefore skip
+ * all remaining clusters in this refblock.
+ */
+
refblock_index = cluster >> s->refcount_block_bits;
refblock_start = refblock_index << s->refcount_block_bits;
- /* Don't allocate a cluster in a refblock already written to disk */
- if (first_free_cluster < refblock_start) {
- first_free_cluster = refblock_start;
- }
- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
- nb_clusters, &first_free_cluster);
- if (refblock_offset < 0) {
- fprintf(stderr, "ERROR allocating refblock: %s\n",
- strerror(-refblock_offset));
- res->check_errors++;
- ret = refblock_offset;
- goto fail;
- }
+ if (on_disk_reftable_entries > refblock_index &&
+ on_disk_reftable[refblock_index])
+ {
+ /*
+ * We can get here after a `goto write_refblocks`: We have a
+ * reftable from a previous run, and the refblock is already
+ * allocated. No need to allocate it again.
+ */
+ refblock_offset = on_disk_reftable[refblock_index];
+ } else {
+ int64_t refblock_cluster_index;
- if (reftable_size <= refblock_index) {
- uint32_t old_reftable_size = reftable_size;
- uint64_t *new_on_disk_reftable;
+ /* Don't allocate a cluster in a refblock already written to disk */
+ if (first_free_cluster < refblock_start) {
+ first_free_cluster = refblock_start;
+ }
+ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+ nb_clusters,
+ &first_free_cluster);
+ if (refblock_offset < 0) {
+ fprintf(stderr, "ERROR allocating refblock: %s\n",
+ strerror(-refblock_offset));
+ return refblock_offset;
+ }
- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
- s->cluster_size) / REFTABLE_ENTRY_SIZE;
- new_on_disk_reftable = g_try_realloc(on_disk_reftable,
- reftable_size *
- REFTABLE_ENTRY_SIZE);
- if (!new_on_disk_reftable) {
- res->check_errors++;
- ret = -ENOMEM;
- goto fail;
+ refblock_cluster_index = refblock_offset / s->cluster_size;
+ if (refblock_cluster_index >= end_cluster) {
+ /*
+ * We must write the refblock that holds this refblock's
+ * refcount
+ */
+ end_cluster = refblock_cluster_index + 1;
}
- on_disk_reftable = new_on_disk_reftable;
- memset(on_disk_reftable + old_reftable_size, 0,
- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
+ if (on_disk_reftable_entries <= refblock_index) {
+ on_disk_reftable_entries =
+ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+ s->cluster_size) / REFTABLE_ENTRY_SIZE;
+ on_disk_reftable =
+ g_try_realloc(on_disk_reftable,
+ on_disk_reftable_entries *
+ REFTABLE_ENTRY_SIZE);
+ if (!on_disk_reftable) {
+ return -ENOMEM;
+ }
- /* The offset we have for the reftable is now no longer valid;
- * this will leak that range, but we can easily fix that by running
- * a leak-fixing check after this rebuild operation */
- reftable_offset = -1;
- } else {
- assert(on_disk_reftable);
- }
- on_disk_reftable[refblock_index] = refblock_offset;
+ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
+ (on_disk_reftable_entries -
+ *on_disk_reftable_entries_ptr) *
+ REFTABLE_ENTRY_SIZE);
- /* If this is apparently the last refblock (for now), try to squeeze the
- * reftable in */
- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
- reftable_offset < 0)
- {
- uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
- REFTABLE_ENTRY_SIZE);
- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
- refcount_table, nb_clusters,
- &first_free_cluster);
- if (reftable_offset < 0) {
- fprintf(stderr, "ERROR allocating reftable: %s\n",
- strerror(-reftable_offset));
- res->check_errors++;
- ret = reftable_offset;
- goto fail;
+ *on_disk_reftable_ptr = on_disk_reftable;
+ *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
+
+ reftable_grown = true;
+ } else {
+ assert(on_disk_reftable);
}
+ on_disk_reftable[refblock_index] = refblock_offset;
}
+ /* Refblock is allocated, write it to disk */
+
ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
s->cluster_size, false);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
- goto fail;
+ return ret;
}
- /* The size of *refcount_table is always cluster-aligned, therefore the
- * write operation will not overflow */
+ /*
+ * The refblock is simply a slice of *refcount_table.
+ * Note that the size of *refcount_table is always aligned to
+ * whole clusters, so the write operation will not result in
+ * out-of-bounds accesses.
+ */
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
@@ -2550,23 +2579,99 @@ write_refblocks:
s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
- goto fail;
+ return ret;
}
- /* Go to the end of this refblock */
+ /* This refblock is done, skip to its end */
cluster = refblock_start + s->refcount_block_size - 1;
}
- if (reftable_offset < 0) {
- uint64_t post_refblock_start, reftable_clusters;
+ return reftable_grown;
+}
+
+/*
+ * Creates a new refcount structure based solely on the in-memory information
+ * given through *refcount_table (this in-memory information is basically just
+ * the concatenation of all refblocks). All necessary allocations will be
+ * reflected in that array.
+ *
+ * On success, the old refcount structure is leaked (it will be covered by the
+ * new refcount structure).
+ */
+static int rebuild_refcount_structure(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ void **refcount_table,
+ int64_t *nb_clusters)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t reftable_offset = -1;
+ int64_t reftable_length = 0;
+ int64_t reftable_clusters;
+ int64_t refblock_index;
+ uint32_t on_disk_reftable_entries = 0;
+ uint64_t *on_disk_reftable = NULL;
+ int ret = 0;
+ int reftable_size_changed = 0;
+ struct {
+ uint64_t reftable_offset;
+ uint32_t reftable_clusters;
+ } QEMU_PACKED reftable_offset_and_clusters;
+
+ qcow2_cache_empty(bs, s->refcount_block_cache);
+
+ /*
+ * For each refblock containing entries, we try to allocate a
+ * cluster (in the in-memory refcount table) and write its offset
+ * into on_disk_reftable[]. We then write the whole refblock to
+ * disk (as a slice of the in-memory refcount table).
+ * This is done by rebuild_refcounts_write_refblocks().
+ *
+ * Once we have scanned all clusters, we try to find space for the
+ * reftable. This will dirty the in-memory refcount table (i.e.
+ * make it differ from the refblocks we have already written), so we
+ * need to run rebuild_refcounts_write_refblocks() again for the
+ * range of clusters where the reftable has been allocated.
+ *
+ * This second run might make the reftable grow again, in which case
+ * we will need to allocate another space for it, which is why we
+ * repeat all this until the reftable stops growing.
+ *
+ * (This loop will terminate, because with every cluster the
+ * reftable grows, it can accomodate a multitude of more refcounts,
+ * so that at some point this must be able to cover the reftable
+ * and all refblocks describing it.)
+ *
+ * We then convert the reftable to big-endian and write it to disk.
+ *
+ * Note that we never free any reftable allocations. Doing so would
+ * needlessly complicate the algorithm: The eventual second check
+ * run we do will clean up all leaks we have caused.
+ */
+
+ reftable_size_changed =
+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+ 0, *nb_clusters,
+ &on_disk_reftable,
+ &on_disk_reftable_entries);
+ if (reftable_size_changed < 0) {
+ res->check_errors++;
+ ret = reftable_size_changed;
+ goto fail;
+ }
+
+ /*
+ * There was no reftable before, so rebuild_refcounts_write_refblocks()
+ * must have increased its size (from 0 to something).
+ */
+ assert(reftable_size_changed);
+
+ do {
+ int64_t reftable_start_cluster, reftable_end_cluster;
+ int64_t first_free_cluster = 0;
+
+ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
+ reftable_clusters = size_to_clusters(s, reftable_length);
- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
- reftable_clusters =
- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
- /* Not pretty but simple */
- if (first_free_cluster < post_refblock_start) {
- first_free_cluster = post_refblock_start;
- }
reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
refcount_table, nb_clusters,
&first_free_cluster);
@@ -2578,24 +2683,55 @@ write_refblocks:
goto fail;
}
- goto write_refblocks;
- }
+ /*
+ * We need to update the affected refblocks, so re-run the
+ * write_refblocks loop for the reftable's range of clusters.
+ */
+ assert(offset_into_cluster(s, reftable_offset) == 0);
+ reftable_start_cluster = reftable_offset / s->cluster_size;
+ reftable_end_cluster = reftable_start_cluster + reftable_clusters;
+ reftable_size_changed =
+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+ reftable_start_cluster,
+ reftable_end_cluster,
+ &on_disk_reftable,
+ &on_disk_reftable_entries);
+ if (reftable_size_changed < 0) {
+ res->check_errors++;
+ ret = reftable_size_changed;
+ goto fail;
+ }
+
+ /*
+ * If the reftable size has changed, we will need to find a new
+ * allocation, repeating the loop.
+ */
+ } while (reftable_size_changed);
- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+ /* The above loop must have run at least once */
+ assert(reftable_offset >= 0);
+
+ /*
+ * All allocations are done, all refblocks are written, convert the
+ * reftable to big-endian and write it to disk.
+ */
+
+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+ refblock_index++)
+ {
cpu_to_be64s(&on_disk_reftable[refblock_index]);
}
- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
- reftable_size * REFTABLE_ENTRY_SIZE,
+ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
false);
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
goto fail;
}
- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
+ assert(reftable_length < INT_MAX);
ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
- reftable_size * REFTABLE_ENTRY_SIZE);
+ reftable_length);
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
goto fail;
@@ -2604,7 +2740,7 @@ write_refblocks:
/* Enter new reftable into the image header */
reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
reftable_offset_and_clusters.reftable_clusters =
- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
+ cpu_to_be32(reftable_clusters);
ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
&reftable_offset_and_clusters,
@@ -2614,12 +2750,14 @@ write_refblocks:
goto fail;
}
- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+ refblock_index++)
+ {
be64_to_cpus(&on_disk_reftable[refblock_index]);
}
s->refcount_table = on_disk_reftable;
s->refcount_table_offset = reftable_offset;
- s->refcount_table_size = reftable_size;
+ s->refcount_table_size = on_disk_reftable_entries;
update_max_refcount_table_index(s);
return 0;
--
2.31.1

@ -0,0 +1,92 @@
From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 11 May 2022 19:49:23 -0500
Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers
RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm)
RH-Bugzilla: 1708300
RH-Acked-by: Nir Soffer <None>
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
The next patch wants to adjust whether the NBD server code advertises
MULTI_CONN based on whether it is known if the server limits to
exactly one client. For a server started by QMP, this information is
obtained through nbd_server_start (which can support more than one
export); but for qemu-nbd (which supports exactly one export), it is
controlled only by the command-line option -e/--shared. Since we
already have a hook function used by qemu-nbd, it's easiest to just
alter its signature to fit our needs.
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-Id: <20220512004924.417153-2-eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999)
Signed-off-by: Eric Blake <eblake@redhat.com>
---
blockdev-nbd.c | 8 ++++----
include/block/nbd.h | 2 +-
qemu-nbd.c | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 9840d25a82..add41a23af 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -30,18 +30,18 @@ typedef struct NBDServerData {
} NBDServerData;
static NBDServerData *nbd_server;
-static bool is_qemu_nbd;
+static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */
static void nbd_update_server_watch(NBDServerData *s);
-void nbd_server_is_qemu_nbd(bool value)
+void nbd_server_is_qemu_nbd(int max_connections)
{
- is_qemu_nbd = value;
+ qemu_nbd_connections = max_connections;
}
bool nbd_server_is_running(void)
{
- return nbd_server || is_qemu_nbd;
+ return nbd_server || qemu_nbd_connections >= 0;
}
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
diff --git a/include/block/nbd.h b/include/block/nbd.h
index a98eb665da..c5a29ce1c6 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
void nbd_client_get(NBDClient *client);
void nbd_client_put(NBDClient *client);
-void nbd_server_is_qemu_nbd(bool value);
+void nbd_server_is_qemu_nbd(int max_connections);
bool nbd_server_is_running(void);
void nbd_server_start(SocketAddress *addr, const char *tls_creds,
const char *tls_authz, uint32_t max_connections,
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 713e7557a9..8c25ae93df 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -1087,7 +1087,7 @@ int main(int argc, char **argv)
bs->detect_zeroes = detect_zeroes;
- nbd_server_is_qemu_nbd(true);
+ nbd_server_is_qemu_nbd(shared);
export_opts = g_new(BlockExportOptions, 1);
*export_opts = (BlockExportOptions) {
--
2.31.1

@ -0,0 +1,100 @@
From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:35 +0800
Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in
aarch64_numa_cpu()
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
In aarch64_numa_cpu(), the CPU and NUMA association is something
like below. Two threads in the same core/cluster/socket are
associated with two individual NUMA nodes, which is unreal as
Igor Mammedov mentioned. We don't expect the association to break
NUMA-to-socket boundary, which matches with the real world.
NUMA-node socket cluster core thread
------------------------------------------
0 0 0 0 0
1 0 0 0 1
This corrects the topology for CPUs and their association with
NUMA nodes. After this patch is applied, the CPU and NUMA
association becomes something like below, which looks real.
Besides, socket/cluster/core/thread IDs are all checked when
the NUMA node IDs are verified. It helps to check if the CPU
topology is properly populated or not.
NUMA-node socket cluster core thread
------------------------------------------
0 1 0 0 0
1 0 0 0 0
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Igor Mammedov <imammedo@redhat.com>
Message-id: 20220503140304.855514-5-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
tests/qtest/numa-test.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index aeda8c774c..32e35daaae 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data)
g_autofree char *cli = NULL;
cli = make_cli(data, "-machine "
- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 "
+ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 "
"-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
- "-numa cpu,node-id=1,thread-id=0 "
- "-numa cpu,node-id=0,thread-id=1");
+ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 "
+ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0");
qts = qtest_init(cli);
cpus = get_cpus(qts, &resp);
g_assert(cpus);
while ((e = qlist_pop(cpus))) {
QDict *cpu, *props;
- int64_t thread, node;
+ int64_t socket, cluster, core, thread, node;
cpu = qobject_to(QDict, e);
g_assert(qdict_haskey(cpu, "props"));
@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data)
g_assert(qdict_haskey(props, "node-id"));
node = qdict_get_int(props, "node-id");
+ g_assert(qdict_haskey(props, "socket-id"));
+ socket = qdict_get_int(props, "socket-id");
+ g_assert(qdict_haskey(props, "cluster-id"));
+ cluster = qdict_get_int(props, "cluster-id");
+ g_assert(qdict_haskey(props, "core-id"));
+ core = qdict_get_int(props, "core-id");
g_assert(qdict_haskey(props, "thread-id"));
thread = qdict_get_int(props, "thread-id");
- if (thread == 0) {
+ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) {
g_assert_cmpint(node, ==, 1);
- } else if (thread == 1) {
+ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) {
g_assert_cmpint(node, ==, 0);
} else {
g_assert(false);
--
2.31.1

@ -0,0 +1,68 @@
From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Wed, 11 May 2022 18:01:35 +0800
Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in
aarch64_numa_cpu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Gavin Shan <gshan@redhat.com>
RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology
RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9)
RH-Bugzilla: 2041823
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823
The CPU topology isn't enabled on arm/virt machine yet, but we're
going to do it in next patch. After the CPU topology is enabled by
next patch, "thread-id=1" becomes invalid because the CPU core is
preferred on arm/virt machine. It means these two CPUs have 0/1
as their core IDs, but their thread IDs are all 0. It will trigger
test failure as the following message indicates:
[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR
1.48s killed by signal 6 SIGABRT
>>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \
QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \
QTEST_QEMU_BINARY=./qemu-system-aarch64 \
QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \
/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k
――――――――――――――――――――――――――――――――――――――――――――――
stderr:
qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found
This fixes the issue by providing comprehensive SMP configurations
in aarch64_numa_cpu(). The SMP configurations aren't used before
the CPU topology is enabled in next patch.
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Message-id: 20220503140304.855514-3-gshan@redhat.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09)
Signed-off-by: Gavin Shan <gshan@redhat.com>
---
tests/qtest/numa-test.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index 90bf68a5b3..aeda8c774c 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data)
QTestState *qts;
g_autofree char *cli = NULL;
- cli = make_cli(data, "-machine smp.cpus=2 "
+ cli = make_cli(data, "-machine "
+ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 "
"-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
"-numa cpu,node-id=1,thread-id=0 "
"-numa cpu,node-id=0,thread-id=1");
--
2.31.1

@ -0,0 +1,106 @@
From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 29 Jul 2022 16:55:34 +0200
Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions
RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9)
RH-Bugzilla: 2111994
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Claudio Imbrenda <None>
Upstream Status: RHEL-only
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994
Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f
("linux-headers: Update to v5.18-rc6"), but this is focusing on
the file linux-headers/linux/kvm.h only (since the other changes
related to the VFIO renaming might break some stuff).
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index d232feaae9..0d05d02ee4 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -445,7 +445,11 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
__u32 type;
- __u64 flags;
+ __u32 ndata;
+ union {
+ __u64 flags;
+ __u64 data[16];
+ };
} system_event;
/* KVM_EXIT_S390_STSI */
struct {
@@ -562,9 +566,12 @@ struct kvm_s390_mem_op {
__u32 op; /* type of operation */
__u64 buf; /* buffer in userspace */
union {
- __u8 ar; /* the access register number */
+ struct {
+ __u8 ar; /* the access register number */
+ __u8 key; /* access key, ignored if flag unset */
+ };
__u32 sida_offset; /* offset into the sida */
- __u8 reserved[32]; /* should be set to 0 */
+ __u8 reserved[32]; /* ignored */
};
};
/* types for kvm_s390_mem_op->op */
@@ -572,9 +579,12 @@ struct kvm_s390_mem_op {
#define KVM_S390_MEMOP_LOGICAL_WRITE 1
#define KVM_S390_MEMOP_SIDA_READ 2
#define KVM_S390_MEMOP_SIDA_WRITE 3
+#define KVM_S390_MEMOP_ABSOLUTE_READ 4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
/* for KVM_INTERRUPT */
struct kvm_interrupt {
@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
#define KVM_CAP_SYS_ATTRIBUTES 209
+#define KVM_CAP_PPC_AIL_MODE_3 210
+#define KVM_CAP_S390_MEM_OP_EXTENSION 211
+#define KVM_CAP_PMU_CAPABILITY 212
+#define KVM_CAP_DISABLE_QUIRKS2 213
+/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_SYSTEM_EVENT_DATA 215
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1624,9 +1640,6 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
-/* Available with KVM_CAP_XSAVE2 */
-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
-
struct kvm_s390_pv_sec_parm {
__u64 origin;
__u64 length;
@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn {
#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+#define KVM_PMU_CAP_DISABLE (1 << 0)
+
/**
* struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
* @flags: Some extra information for header, always 0 for now.
--
2.31.1

@ -0,0 +1,19 @@
# User changes in this file are preserved across upgrades.
#
# Setting "modprobe kvm nested=1" only enables Nested Virtualization until
# the next reboot or module reload. Uncomment the option below to enable
# the feature permanently.
#
#options kvm nested=1
#
#
# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB)
# support until the next reboot or module reload. Uncomment the option
# below to enable the feature permanently.
#
# Note: - Incompatible with "nested=1". Loading the module will fail.
# - Dirty page logging will be performed on a 1MB (not 4KB) basis,
# which can result in a lot of data having to be transferred during
# migration, and therefore taking very long to converge.
#
#options kvm hpage=1

@ -0,0 +1,96 @@
From e5360c1e76fee8b8dcbcba7efbb1e36f0b48ac40 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 22 Aug 2022 14:53:20 +0200
Subject: [PATCH 01/23] scsi-generic: Fix emulated block limits VPD page
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 115: scsi-generic: Fix emulated block limits VPD page
RH-Bugzilla: 2120275
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/1] 336ba583311a80beeadd1900336056404f63211a (kmwolf/centos-qemu-kvm)
Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum
transfer length is calculated for patching block limits VPD page in an
INQUIRY response.
The same updates also need to be made for the case where the host device
does not support the block limits VPD page at all and we emulate the
whole page.
Without this fix, on host block devices a maximum transfer length of
(INT_MAX - sector_size) bytes is advertised to the guest, resulting in
I/O errors when a request that exceeds the host limits is made by the
guest. (Prior to commit 24b36e9813e, this code path would use the
max_transfer value from the host instead of INT_MAX, but still miss the
fix from 01ef8185b80 where max_transfer is also capped to max_iov
host pages, so it would be less wrong, but still wrong.)
Cc: qemu-stable@nongnu.org
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251
Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a
Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-Id: <20220822125320.48257-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee)
Resolved conflict: qemu_real_host_page_size() is a getter function in
current upstream, but still just a public global variable downstream.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/scsi/scsi-generic.c | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 0306ccc7b1..3742899839 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk,
return 0;
}
+static uint64_t calculate_max_transfer(SCSIDevice *s)
+{
+ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
+ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
+
+ assert(max_transfer);
+ max_transfer = MIN_NON_ZERO(max_transfer,
+ max_iov * qemu_real_host_page_size);
+
+ return max_transfer / s->blocksize;
+}
+
static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
{
uint8_t page, page_idx;
@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
(r->req.cmd.buf[1] & 0x01)) {
page = r->req.cmd.buf[2];
if (page == 0xb0) {
- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
-
- assert(max_transfer);
- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size)
- / s->blocksize;
+ uint64_t max_transfer = calculate_max_transfer(s);
stl_be_p(&r->buf[8], max_transfer);
/* Also take care of the opt xfer len. */
stl_be_p(&r->buf[12],
@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
uint8_t buf[64];
SCSIBlockLimits bl = {
- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
+ .max_io_sectors = calculate_max_transfer(s),
};
memset(r->buf, 0, r->buflen);
--
2.31.1

@ -0,0 +1,54 @@
From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 6 May 2022 15:25:09 +0200
Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning
RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm)
RH-Bugzilla: 2037612
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: Andrew Jones <drjones@redhat.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961
Upstream Status: YES
Tested: With TPM-CRB and VFIO
In a subsequent patch, VFIO will need to recognize if
a memory region owner is a TPM CRB device. Hence VFIO
needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So
let's add a stub function.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Suggested-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Stefan Berger <stefanb@linnux.ibm.com>
Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
include/sysemu/tpm.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 68b2206463..fb40e30ff6 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti)
#define tpm_init() (0)
#define tpm_cleanup()
+/* needed for an alignment check in non-tpm code */
+static inline Object *TPM_IS_CRB(Object *obj)
+{
+ return NULL;
+}
+
#endif /* CONFIG_TPM */
#endif /* QEMU_TPM_H */
--
2.31.1

@ -0,0 +1,129 @@
From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Mon, 21 Mar 2022 12:05:42 +0000
Subject: [PATCH 06/18] target/arm: deprecate named CPU models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu)
RH-Bugzilla: 2060839
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
KVM requires use of the 'host' CPU model, so named CPU models are only
needed for TCG. Since we don't consider TCG to be supported we can
deprecate all the named CPU models. TCG users can rely on 'max' model.
Note: this has the effect of deprecating the default built-in CPU
model 'cortex-a57'. Applications using QEMU are expected to make an
explicit choice about which CPU model they want, since no builtin
default can suit all purposes.
https://bugzilla.redhat.com/show_bug.cgi?id=2060839
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
target/arm/cpu-qom.h | 1 +
target/arm/cpu.c | 5 +++++
target/arm/cpu.h | 2 ++
target/arm/cpu64.c | 8 +++++++-
target/arm/helper.c | 2 ++
5 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h
index 64c44cef2d..82e97249bc 100644
--- a/target/arm/cpu-qom.h
+++ b/target/arm/cpu-qom.h
@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo {
const char *name;
void (*initfn)(Object *obj);
void (*class_init)(ObjectClass *oc, void *data);
+ const char *deprecation_note;
} ARMCPUInfo;
void arm_cpu_register(const ARMCPUInfo *info);
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 5d4ca7a227..c74b0fb462 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
acc->info = data;
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void arm_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 23879de5fa..c0c9f680e5 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -33,6 +33,8 @@
#define KVM_HAVE_MCE_INJECTION 1
#endif
+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'"
+
#define EXCP_UDEF 1 /* undefined instruction */
#define EXCP_SWI 2 /* software interrupt */
#define EXCP_PREFETCH_ABORT 3
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index e80b831073..c8f152891c 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj)
#endif /* disabled for RHEL */
static const ARMCPUInfo aarch64_cpus[] = {
- { .name = "cortex-a57", .initfn = aarch64_a57_initfn },
+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn,
+ .deprecation_note = RHEL_CPU_DEPRECATION },
#if 0 /* Disabled for Red Hat Enterprise Linux */
{ .name = "cortex-a53", .initfn = aarch64_a53_initfn },
{ .name = "cortex-a72", .initfn = aarch64_a72_initfn },
@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj)
static void cpu_register_class_init(ObjectClass *oc, void *data)
{
ARMCPUClass *acc = ARM_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
acc->info = data;
+
+ if (acc->info->deprecation_note) {
+ cc->deprecation_note = acc->info->deprecation_note;
+ }
}
void aarch64_cpu_register(const ARMCPUInfo *info)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 7d14650615..3d34f63e49 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8560,6 +8560,7 @@ void arm_cpu_list(void)
static void arm_cpu_add_definition(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
+ CPUClass *cc = CPU_CLASS(oc);
CpuDefinitionInfoList **cpu_list = user_data;
CpuDefinitionInfo *info;
const char *typename;
@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
info->name = g_strndup(typename,
strlen(typename) - strlen("-" TYPE_ARM_CPU));
info->q_typename = g_strdup(typename);
+ info->deprecated = !!cc->deprecation_note;
QAPI_LIST_PREPEND(*cpu_list, info);
}
--
2.35.3

@ -0,0 +1,273 @@
From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Thu, 17 Mar 2022 17:59:22 +0000
Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu)
RH-Bugzilla: 2060839
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to
select which CPUs we want to support, such that there is at least one
supported guest CPU that can be launched for every physical machine
capable of running RHEL-9 KVM.
Supported CPUs:
* QEMU models
base (QEMU internal)
host (host passthrough)
max (host passthrough for KVM,
all emulated features for TCG)
* Intel models
Icelake-Server
Icelake-Server-noTSX
Cascadelake-Server (2019)
Cascadelake-Server-noTSX (2019)
Skylake-Server (2016)
Skylake-Server-IBRS (2016)
Skylake-Server-noTSX-IBRS (2016)
Skylake-Client (2015)
Skylake-Client-IBRS (2015)
Skylake-Client-noTSX-IBRS (2015)
Broadwell (2014)
Broadwell-IBRS (2014)
Broadwell-noTSX (2014)
Broadwell-noTSX-IBRS (2014)
Haswell (2013)
Haswell-IBRS (2013)
Haswell-noTSX (2013)
Haswell-noTSX-IBRS (2013)
IvyBridge (2012)
IvyBridge-IBRS (2012)
SandyBridge (2011)
SandyBridge-IBRS (2011)
Westmere (2010)
Westmere-IBRS (2010)
Nehalem (2008)
Nehalem-IBRS (2008)
Cooperlake (2020)
Snowridge (2019)
KnightsMill (2017)
Denverton (2016)
* AMD models
EPYC-Milan (2021)
EPYC-Rome (2019)
EPYC (2017)
EPYC-IBPB (2017)
Opteron_G5 (2012)
Opteron_G4 (2011)
* Other
Dhyana (2018)
(I've omitted the many -vNNN versions for brevity)
Deprecated CPUs:
486
athlon
Conroe
core2duo
coreduo
Icelake-Client (already deprecated upstream)
Icelake-Client-noTSX (already deprecated upstream)
kvm32
kvm64
n270
Opteron_G1
Opteron_G2
Opteron_G3
Penryn
pentium2
pentium3
pentium
phenom
qemu32
qemu64
The deprecated CPU models are subject to removal in a future
major version of RHEL.
Note: this has the effect of deprecating the default built-in CPU
model 'qemu64'. Applications using QEMU are expected to make an
explicit choice about which CPU model they want, since no builtin
default can suit all purposes.
https://bugzilla.redhat.com/show_bug.cgi?id=2060839
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
target/i386/cpu.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index cb6b5467d0..87cb641b5f 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = {
* PT in VMX operation
*/
+#define RHEL_CPU_DEPRECATION \
+ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'"
+
static const X86CPUDefinition builtin_x86_defs[] = {
{
.name = "qemu64",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 0xd,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "phenom",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 16,
@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "core2duo",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm64",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 0xd,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "qemu32",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 4,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "kvm32",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 5,
.vendor = CPUID_VENDOR_INTEL,
.family = 15,
@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "coreduo",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "486",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 4,
@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 1,
.vendor = CPUID_VENDOR_INTEL,
.family = 5,
@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium2",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 2,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "pentium3",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 3,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "athlon",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 2,
.vendor = CPUID_VENDOR_AMD,
.family = 6,
@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "n270",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Conroe",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Penryn",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 10,
.vendor = CPUID_VENDOR_INTEL,
.family = 6,
@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G1",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G2",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 15,
@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
{
.name = "Opteron_G3",
+ .deprecation_note = RHEL_CPU_DEPRECATION,
.level = 5,
.vendor = CPUID_VENDOR_AMD,
.family = 16,
--
2.35.3

@ -0,0 +1,48 @@
From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
Date: Mon, 2 May 2022 17:59:11 -0300
Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Murilo Opsfelder Araújo <muriloo@linux.ibm.com>
RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif"
RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm)
RH-Bugzilla: 2081022
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the
"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the
build on ppc64le:
../target/ppc/cpu-models.c:904:2: error: #endif without #if
#endif
^
1 error generated.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022
Fixes: b9d28ecdedaf (Enable/disable devices for RHEL)
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
---
target/ppc/cpu-models.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index dd78883410..528467eac1 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -746,6 +746,7 @@
/* PowerPC CPU aliases */
PowerPCCPUAlias ppc_cpu_aliases[] = {
+#if 0 /* Disabled for Red Hat Enterprise Linux */
{ "405", "405d4" },
{ "405cr", "405crc" },
{ "405gp", "405gpd" },
--
2.35.1

@ -0,0 +1,194 @@
From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= <berrange@redhat.com>
Date: Thu, 17 Mar 2022 17:59:22 +0000
Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Daniel P. Berrangé <berrange@redhat.com>
RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models
RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu)
RH-Bugzilla: 2060839
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RHEL-9 is compiled with the z14 ABI. We use this as a baseline to
select which CPUs we want to support, such that there is at least one
supported guest CPU that can be launched for every physical
machine capable of running RHEL-9 KVM.
Supported CPUs:
gen15a-base
gen15a
gen15b-base
gen15b
gen16a-base
gen16a
gen16b-base
gen16b
max
qemu
z14.2-base
z14.2
z14-base
z14
z14ZR1-base
z14ZR1
Deprecated CPUs:
z10BC.2-base
z10BC.2
z10BC-base
z10BC
z10EC.2-base
z10EC.2
z10EC.3-base
z10EC.3
z10EC-base
z10EC
z114-base
z114
z13.2-base
z13.2
z13-base
z13s-base
z13s
z13
z196.2-base
z196.2
z196-base
z196
z800-base
z800
z890.2-base
z890.2
z890.3-base
z890.3
z890-base
z890
z900.2-base
z900.2
z900.3-base
z900.3
z900-base
z900
z990.2-base
z990.2
z990.3-base
z990.3
z990.4-base
z990.4
z990.5-base
z990.5
z990-base
z990
z9BC.2-base
z9BC.2
z9BC-base
z9BC
z9EC.2-base
z9EC.2
z9EC.3-base
z9EC.3
z9EC-base
z9EC
zBC12-base
zBC12
zEC12.2-base
zEC12.2
zEC12-base
zEC12
https://bugzilla.redhat.com/show_bug.cgi?id=2060839
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
target/s390x/cpu_models.c | 11 +++++++++++
target/s390x/cpu_models.h | 2 ++
target/s390x/cpu_models_sysemu.c | 2 ++
3 files changed, 15 insertions(+)
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 6d71428056..9b9fc41676 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -45,6 +45,9 @@
* of a following release have been a superset of the previous release. With
* generation 15 one base feature and one optional feature have been deprecated.
*/
+
+#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'"
+
static S390CPUDef s390_cpu_defs[] = {
CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data)
{
S390CPUClass *xcc = S390_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
/* all base models are migration safe */
xcc->cpu_def = (const S390CPUDef *) data;
xcc->is_migration_safe = true;
xcc->is_static = true;
xcc->desc = xcc->cpu_def->desc;
+ if (xcc->cpu_def->gen < 14) {
+ cc->deprecation_note = RHEL_CPU_DEPRECATION;
+ }
}
static void s390_cpu_model_class_init(ObjectClass *oc, void *data)
{
S390CPUClass *xcc = S390_CPU_CLASS(oc);
+ CPUClass *cc = CPU_CLASS(oc);
/* model that can change between QEMU versions */
xcc->cpu_def = (const S390CPUDef *) data;
xcc->is_migration_safe = true;
xcc->desc = xcc->cpu_def->desc;
+ if (xcc->cpu_def->gen < 14) {
+ cc->deprecation_note = RHEL_CPU_DEPRECATION;
+ }
}
static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data)
diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h
index 74d1f87e4f..372160bcd7 100644
--- a/target/s390x/cpu_models.h
+++ b/target/s390x/cpu_models.h
@@ -38,6 +38,8 @@ struct S390CPUDef {
S390FeatBitmap full_feat;
/* used to init full_feat from generated data */
S390FeatInit full_init;
+ /* if deprecated, provides a suggestion */
+ const char *deprecation_note;
};
/* CPU model based on a CPU definition */
diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c
index 6a04ccab1b..f3b7c304ec 100644
--- a/target/s390x/cpu_models_sysemu.c
+++ b/target/s390x/cpu_models_sysemu.c
@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque)
CpuDefinitionInfo *info;
char *name = g_strdup(object_class_get_name(klass));
S390CPUClass *scc = S390_CPU_CLASS(klass);
+ CPUClass *cc = CPU_CLASS(klass);
/* strip off the -s390x-cpu */
g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0;
@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque)
info->migration_safe = scc->is_migration_safe;
info->q_static = scc->is_static;
info->q_typename = g_strdup(object_class_get_name(klass));
+ info->deprecated = !!cc->deprecation_note;
/* check for unavailable features */
if (cpu_list_data->model) {
Object *obj;
--
2.35.3

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save