From 0023a15dcc8058c97c89369e9a8eedb8780a66e4 Mon Sep 17 00:00:00 2001 From: MSVSphere Packaging Team Date: Wed, 13 Sep 2023 03:09:49 +0300 Subject: [PATCH] import qemu-kvm-7.2.0-14.el9_2.5 --- ...ce-between-epoll-upgrade-and-aio_set.patch | 90 ++++++ ...ch-if-TLS-channel-is-closed-during-h.patch | 103 +++++++ ...pa-map-shadow-vrings-with-MAP_SHARED.patch | 131 +++++++++ ...ova-tree-creation-from-init-to-start.patch | 268 ++++++++++++++++++ ...st_vdpa_net_cvq_cmd_page_len-functio.patch | 84 ++++++ ...t-cleanup-the-vdpa-vhost-net-structu.patch | 69 +++++ SPECS/qemu-kvm.spec | 32 ++- 7 files changed, 776 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch create mode 100644 SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch create mode 100644 SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch create mode 100644 SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch create mode 100644 SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch create mode 100644 SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch diff --git a/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch b/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch new file mode 100644 index 0000000..5ee3270 --- /dev/null +++ b/SOURCES/kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch @@ -0,0 +1,90 @@ +From 244e92fea388d2be9fe81a5c5912d92b8f599caa Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 23 Mar 2023 10:48:59 -0400 +Subject: [PATCH 1/2] aio-posix: fix race between epoll upgrade and + aio_set_fd_handler() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 292: aio-posix: fix race between epoll upgrade and aio_set_fd_handler() +RH-Bugzilla: 2211923 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Paolo Bonzini +RH-Commit: [1/1] 182471bac79fa2b2ae8a34087eb6c4ab1af786e1 + +If another thread calls aio_set_fd_handler() while the IOThread event +loop is upgrading from ppoll(2) to epoll(7) then we might miss new +AioHandlers. The epollfd will not monitor the new AioHandler's fd, +resulting in hangs. + +Take the AioHandler list lock while upgrading to epoll. This prevents +AioHandlers from changing while epoll is being set up. If we cannot lock +because we're in a nested event loop, then don't upgrade to epoll (it +will happen next time we're not in a nested call). + +The downside to taking the lock is that the aio_set_fd_handler() thread +has to wait until the epoll upgrade is finished, which involves many +epoll_ctl(2) system calls. However, this scenario is rare and I couldn't +think of another solution that is still simple. + +Reported-by: Qing Wang +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2090998 +Cc: Paolo Bonzini +Cc: Fam Zheng +Signed-off-by: Stefan Hajnoczi +Message-Id: <20230323144859.1338495-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit e62da98527fa35fe5f532cded01a33edf9fbe7b2) +Signed-off-by: Stefan Hajnoczi +--- + util/fdmon-epoll.c | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) + +diff --git a/util/fdmon-epoll.c b/util/fdmon-epoll.c +index e11a8a022e..1683aa1105 100644 +--- a/util/fdmon-epoll.c ++++ b/util/fdmon-epoll.c +@@ -127,6 +127,8 @@ static bool fdmon_epoll_try_enable(AioContext *ctx) + + bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) + { ++ bool ok; ++ + if (ctx->epollfd < 0) { + return false; + } +@@ -136,14 +138,23 @@ bool fdmon_epoll_try_upgrade(AioContext *ctx, unsigned npfd) + return false; + } + +- if (npfd >= EPOLL_ENABLE_THRESHOLD) { +- if (fdmon_epoll_try_enable(ctx)) { +- return true; +- } else { +- fdmon_epoll_disable(ctx); +- } ++ if (npfd < EPOLL_ENABLE_THRESHOLD) { ++ return false; ++ } ++ ++ /* The list must not change while we add fds to epoll */ ++ if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) { ++ return false; ++ } ++ ++ ok = fdmon_epoll_try_enable(ctx); ++ ++ qemu_lockcnt_inc_and_unlock(&ctx->list_lock); ++ ++ if (!ok) { ++ fdmon_epoll_disable(ctx); + } +- return false; ++ return ok; + } + + void fdmon_epoll_setup(AioContext *ctx) +-- +2.39.3 + diff --git a/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch new file mode 100644 index 0000000..95ae201 --- /dev/null +++ b/SOURCES/kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch @@ -0,0 +1,103 @@ +From c13b4e32be9de900e7a55ebf5c341df8363e3b4a Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 15 Aug 2023 00:08:55 +0000 +Subject: [PATCH 4/4] io: remove io watch if TLS channel is closed during + handshake +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 301: io: remove io watch if TLS channel is closed during handshake +RH-Bugzilla: 2216503 +RH-Acked-by: Peter Xu +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/1] 10bc3055a369a89996a1be34ce8d6c1fbc2c531e (redhat/rhel/src/qemu-kvm/jons-qemu-kvm-2) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216503 +CVE: CVE-2023-3354 +Upstream: Merged + +commit 10be627d2b5ec2d6b3dce045144aa739eef678b4 +Author: Daniel P. Berrangé +Date: Tue Jun 20 09:45:34 2023 +0100 + + io: remove io watch if TLS channel is closed during handshake + + The TLS handshake make take some time to complete, during which time an + I/O watch might be registered with the main loop. If the owner of the + I/O channel invokes qio_channel_close() while the handshake is waiting + to continue the I/O watch must be removed. Failing to remove it will + later trigger the completion callback which the owner is not expecting + to receive. In the case of the VNC server, this results in a SEGV as + vnc_disconnect_start() tries to shutdown a client connection that is + already gone / NULL. + + CVE-2023-3354 + Reported-by: jiangyegen + Signed-off-by: Daniel P. Berrangé + +Signed-off-by: Jon Maloy +--- + include/io/channel-tls.h | 1 + + io/channel-tls.c | 18 ++++++++++++------ + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h +index 5672479e9e..26c67f17e2 100644 +--- a/include/io/channel-tls.h ++++ b/include/io/channel-tls.h +@@ -48,6 +48,7 @@ struct QIOChannelTLS { + QIOChannel *master; + QCryptoTLSSession *session; + QIOChannelShutdown shutdown; ++ guint hs_ioc_tag; + }; + + /** +diff --git a/io/channel-tls.c b/io/channel-tls.c +index c730cb8ec5..bd79e78837 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -195,12 +195,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS *ioc, + } + + trace_qio_channel_tls_handshake_pending(ioc, status); +- qio_channel_add_watch_full(ioc->master, +- condition, +- qio_channel_tls_handshake_io, +- data, +- NULL, +- context); ++ ioc->hs_ioc_tag = ++ qio_channel_add_watch_full(ioc->master, ++ condition, ++ qio_channel_tls_handshake_io, ++ data, ++ NULL, ++ context); + } + } + +@@ -215,6 +216,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel *ioc, + QIOChannelTLS *tioc = QIO_CHANNEL_TLS( + qio_task_get_source(task)); + ++ tioc->hs_ioc_tag = 0; + g_free(data); + qio_channel_tls_handshake_task(tioc, task, context); + +@@ -375,6 +377,10 @@ static int qio_channel_tls_close(QIOChannel *ioc, + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); + ++ if (tioc->hs_ioc_tag) { ++ g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove); ++ } ++ + return qio_channel_close(tioc->master, errp); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch b/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch new file mode 100644 index 0000000..c577758 --- /dev/null +++ b/SOURCES/kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch @@ -0,0 +1,131 @@ +From 965f27235276e3b16ebf630436eb1d7e792a3d2a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 2 Jun 2023 16:38:54 +0200 +Subject: [PATCH 3/4] vdpa: map shadow vrings with MAP_SHARED +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC +RH-Jira: RHEL-1060 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 673ba501d6e76bae9272847acebaf5f01689f9cf + +JIRA: https://issues.redhat.com/browse/RHEL-1060 + +The vdpa devices that use va addresses neeeds these maps shared. +Otherwise, vhost_vdpa checks will refuse to accept the maps. + +The mmap call will always return a page aligned address, so removing the +qemu_memalign call. Keeping the ROUND_UP for the size as we still need +to DMA-map them in full. + +Not applying fixes tag as it never worked with va devices. + +Signed-off-by: Eugenio Pérez +Message-Id: <20230602143854.1879091-4-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit babf8b87127ae809b31b3c0a117dcbc91aaf9aba) + +Conflicts + + because of missing commits: + + 5d410557dea4 ("vhost: fix possible wrap in SVQ descriptor ring") + 5c1ebd4c432e ("vdpa: block migration if device has unsupported features") + + and already backported commit$ + + a0d7215e339b ("vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present") + +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-shadow-virtqueue.c | 18 +++++++++--------- + net/vhost-vdpa.c | 16 ++++++++-------- + 2 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 4307296358..9f09d435be 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -647,7 +647,7 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd) + void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq, VhostIOVATree *iova_tree) + { +- size_t desc_size, driver_size, device_size; ++ size_t desc_size; + + event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->next_guest_avail_elem = NULL; +@@ -659,14 +659,14 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->iova_tree = iova_tree; + + svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); +- driver_size = vhost_svq_driver_area_size(svq); +- device_size = vhost_svq_device_area_size(svq); +- svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); ++ svq->vring.desc = mmap(NULL, vhost_svq_driver_area_size(svq), ++ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, ++ -1, 0); + desc_size = sizeof(vring_desc_t) * svq->vring.num; + svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size); +- memset(svq->vring.desc, 0, driver_size); +- svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); +- memset(svq->vring.used, 0, device_size); ++ svq->vring.used = mmap(NULL, vhost_svq_device_area_size(svq), ++ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, ++ -1, 0); + svq->desc_state = g_new0(SVQDescState, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { +@@ -705,8 +705,8 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + svq->vq = NULL; + g_free(svq->desc_next); + g_free(svq->desc_state); +- qemu_vfree(svq->vring.desc); +- qemu_vfree(svq->vring.used); ++ munmap(svq->vring.desc, vhost_svq_driver_area_size(svq)); ++ munmap(svq->vring.used, vhost_svq_device_area_size(svq)); + event_notifier_set_handler(&svq->hdev_call, NULL); + } + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index d282c90a3d..8bfa95b801 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -203,8 +203,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { + return; + } +- qemu_vfree(s->cvq_cmd_out_buffer); +- qemu_vfree(s->status); ++ munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len()); ++ munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len()); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -761,12 +761,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.shadow_data = svq; + if (!is_datapath) { +- s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), +- vhost_vdpa_net_cvq_cmd_page_len()); +- memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); +- s->status = qemu_memalign(qemu_real_host_page_size(), +- vhost_vdpa_net_cvq_cmd_page_len()); +- memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_ANONYMOUS, -1, 0); ++ s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), ++ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, ++ -1, 0); + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch b/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch new file mode 100644 index 0000000..7191628 --- /dev/null +++ b/SOURCES/kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch @@ -0,0 +1,268 @@ +From 293e249644c14b2bd19dd6a3f08a0e18ec040200 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 3 Mar 2023 18:24:32 +0100 +Subject: [PATCH 1/4] vdpa net: move iova tree creation from init to start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC +RH-Jira: RHEL-1060 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] f85a05eb528820adf4a3c0cad2950b4ab500d5fe + +JIRA: https://issues.redhat.com/browse/RHEL-1060 + +Only create iova_tree if and when it is needed. + +The cleanup keeps being responsible for the last VQ but this change +allows it to merge both cleanup functions. + +Signed-off-by: Eugenio Pérez +Acked-by: Jason Wang +Message-Id: <20230303172445.1089785-2-eperezma@redhat.com> +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 00ef422e9fbfef1fb40447b08826db0951d788dd) + +Conflicts + + because of missing commit + + bf7a2ad8b6df ("vdpa: harden the error path if get_iova_range failed") + +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 113 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 83 insertions(+), 30 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1b4fec59a2..a914348e2a 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -178,7 +178,6 @@ err_init: + static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +- struct vhost_dev *dev = &s->vhost_net->dev; + + /* + * If a peer NIC is attached, do not cleanup anything. +@@ -190,9 +189,6 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + } + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->status); +- if (dev->vq_index + dev->nvqs == dev->vq_index_end) { +- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); +- } + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -242,10 +238,64 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, + return size; + } + ++/** From any vdpa net client, get the netclient of the first queue pair */ ++static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) ++{ ++ NICState *nic = qemu_get_nic(s->nc.peer); ++ NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); ++ ++ return DO_UPCAST(VhostVDPAState, nc, nc0); ++} ++ ++static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) ++{ ++ struct vhost_vdpa *v = &s->vhost_vdpa; ++ ++ if (v->shadow_vqs_enabled) { ++ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, ++ v->iova_range.last); ++ } ++} ++ ++static int vhost_vdpa_net_data_start(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_vdpa *v = &s->vhost_vdpa; ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ if (v->index == 0) { ++ vhost_vdpa_net_data_start_first(s); ++ return 0; ++ } ++ ++ if (v->shadow_vqs_enabled) { ++ VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); ++ v->iova_tree = s0->vhost_vdpa.iova_tree; ++ } ++ ++ return 0; ++} ++ ++static void vhost_vdpa_net_client_stop(NetClientState *nc) ++{ ++ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_dev *dev; ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); ++ ++ dev = s->vhost_vdpa.dev; ++ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } ++} ++ + static NetClientInfo net_vhost_vdpa_info = { + .type = NET_CLIENT_DRIVER_VHOST_VDPA, + .size = sizeof(VhostVDPAState), + .receive = vhost_vdpa_receive, ++ .start = vhost_vdpa_net_data_start, ++ .stop = vhost_vdpa_net_client_stop, + .cleanup = vhost_vdpa_cleanup, + .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .has_ufo = vhost_vdpa_has_ufo, +@@ -359,7 +409,7 @@ dma_map_err: + + static int vhost_vdpa_net_cvq_start(NetClientState *nc) + { +- VhostVDPAState *s; ++ VhostVDPAState *s, *s0; + struct vhost_vdpa *v; + uint64_t backend_features; + int64_t cvq_group; +@@ -423,8 +473,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) + return r; + } + +- v->iova_tree = vhost_iova_tree_new(v->iova_range.first, +- v->iova_range.last); + v->shadow_vqs_enabled = true; + s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID; + +@@ -433,6 +481,27 @@ out: + return 0; + } + ++ s0 = vhost_vdpa_net_first_nc_vdpa(s); ++ if (s0->vhost_vdpa.iova_tree) { ++ /* ++ * SVQ is already configured for all virtqueues. Reuse IOVA tree for ++ * simplicity, whether CVQ shares ASID with guest or not, because: ++ * - Memory listener need access to guest's memory addresses allocated ++ * in the IOVA tree. ++ * - There should be plenty of IOVA address space for both ASID not to ++ * worry about collisions between them. Guest's translations are ++ * still validated with virtio virtqueue_pop so there is no risk for ++ * the guest to access memory that it shouldn't. ++ * ++ * To allocate a iova tree per ASID is doable but it complicates the ++ * code and it is not worth it for the moment. ++ */ ++ v->iova_tree = s0->vhost_vdpa.iova_tree; ++ } else { ++ v->iova_tree = vhost_iova_tree_new(v->iova_range.first, ++ v->iova_range.last); ++ } ++ + r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, + vhost_vdpa_net_cvq_cmd_page_len(), false); + if (unlikely(r < 0)) { +@@ -457,15 +526,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) + if (s->vhost_vdpa.shadow_vqs_enabled) { + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status); +- if (!s->always_svq) { +- /* +- * If only the CVQ is shadowed we can delete this safely. +- * If all the VQs are shadows this will be needed by the time the +- * device is started again to register SVQ vrings and similar. +- */ +- g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); +- } + } ++ ++ vhost_vdpa_net_client_stop(nc); + } + + static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, +@@ -675,8 +738,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + int nvqs, + bool is_datapath, + bool svq, +- struct vhost_vdpa_iova_range iova_range, +- VhostIOVATree *iova_tree) ++ struct vhost_vdpa_iova_range iova_range) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -698,7 +760,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vqs_enabled = svq; + s->vhost_vdpa.iova_range = iova_range; + s->vhost_vdpa.shadow_data = svq; +- s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), + vhost_vdpa_net_cvq_cmd_page_len()); +@@ -776,7 +837,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; +- g_autoptr(VhostIOVATree) iova_tree = NULL; + struct vhost_vdpa_iova_range iova_range; + NetClientState *nc; + int queue_pairs, r, i = 0, has_cvq = 0; +@@ -822,12 +882,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + } + + vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); +- if (opts->x_svq) { +- if (!vhost_vdpa_net_valid_svq_features(features, errp)) { +- goto err_svq; +- } +- +- iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); ++ if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) { ++ goto err; + } + + ncs = g_malloc0(sizeof(*ncs) * queue_pairs); +@@ -835,7 +891,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 2, true, opts->x_svq, +- iova_range, iova_tree); ++ iova_range); + if (!ncs[i]) + goto err; + } +@@ -843,13 +899,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, + vdpa_device_fd, i, 1, false, +- opts->x_svq, iova_range, iova_tree); ++ opts->x_svq, iova_range); + if (!nc) + goto err; + } + +- /* iova_tree ownership belongs to last NetClientState */ +- g_steal_pointer(&iova_tree); + return 0; + + err: +@@ -859,7 +913,6 @@ err: + } + } + +-err_svq: + qemu_close(vdpa_device_fd); + + return -1; +-- +2.39.3 + diff --git a/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch b/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch new file mode 100644 index 0000000..57c3f6f --- /dev/null +++ b/SOURCES/kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch @@ -0,0 +1,84 @@ +From af109b3c7e8d7cb3b6c7c842a92ddf5de2270a3c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 2 Jun 2023 16:38:53 +0200 +Subject: [PATCH 2/4] vdpa: reorder vhost_vdpa_net_cvq_cmd_page_len function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 298: Fix qemu core dump with "x-svq=on" when hot-plugging a NIC +RH-Jira: RHEL-1060 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] df45d2faa25186a246c18f24909ced67f94cf33f + +JIRA: https://issues.redhat.com/browse/RHEL-1060 + +We need to call it from resource cleanup context, as munmap needs the +size of the mappings. + +Signed-off-by: Eugenio Pérez +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20230602143854.1879091-3-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 915bf6ccd7a5c9b6cbea7a72f153597d1b98834f) +Signed-off-by: Laurent Vivier +--- + net/vhost-vdpa.c | 32 ++++++++++++++++---------------- + 1 file changed, 16 insertions(+), 16 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index a914348e2a..d282c90a3d 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -110,6 +110,22 @@ VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + return s->vhost_net; + } + ++static size_t vhost_vdpa_net_cvq_cmd_len(void) ++{ ++ /* ++ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. ++ * In buffer is always 1 byte, so it should fit here ++ */ ++ return sizeof(struct virtio_net_ctrl_hdr) + ++ 2 * sizeof(struct virtio_net_ctrl_mac) + ++ MAC_TABLE_ENTRIES * ETH_ALEN; ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_page_len(void) ++{ ++ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); ++} ++ + static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp) + { + uint64_t invalid_dev_features = +@@ -362,22 +378,6 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) + vhost_iova_tree_remove(tree, *map); + } + +-static size_t vhost_vdpa_net_cvq_cmd_len(void) +-{ +- /* +- * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. +- * In buffer is always 1 byte, so it should fit here +- */ +- return sizeof(struct virtio_net_ctrl_hdr) + +- 2 * sizeof(struct virtio_net_ctrl_mac) + +- MAC_TABLE_ENTRIES * ETH_ALEN; +-} +- +-static size_t vhost_vdpa_net_cvq_cmd_page_len(void) +-{ +- return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); +-} +- + /** Map CVQ buffer. */ + static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, + bool write) +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch new file mode 100644 index 0000000..0c82680 --- /dev/null +++ b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch @@ -0,0 +1,69 @@ +From cca66d3e5f7bc1d88d79a7653ae244ba31566ee8 Mon Sep 17 00:00:00 2001 +From: Ani Sinha +Date: Mon, 19 Jun 2023 12:22:09 +0530 +Subject: [PATCH 2/2] vhost-vdpa: do not cleanup the vdpa/vhost-net structures + if peer nic is present + +RH-Author: Ani Sinha +RH-MergeRequest: 294: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present +RH-Bugzilla: 2227721 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] af8fa659afb3d8a2e38bb745b31d8cd665a1fc77 + +When a peer nic is still attached to the vdpa backend, it is too early to free +up the vhost-net and vdpa structures. If these structures are freed here, then +QEMU crashes when the guest is being shut down. The following call chain +would result in an assertion failure since the pointer returned from +vhost_vdpa_get_vhost_net() would be NULL: + +do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> +virtio_net_vhost_status() -> get_vhost_net(). + +Therefore, we defer freeing up the structures until at guest shutdown +time when qemu_cleanup() calls net_cleanup() which then calls +qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() +again to free up the structures. This time, the loop in net_cleanup() +ensures that vhost_vdpa_cleanup() will be called one last time when +all the peer nics are detached and freed. + +All unit tests pass with this change. + +CC: imammedo@redhat.com +CC: jusual@redhat.com +CC: mst@redhat.com +Fixes: CVE-2023-3301 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 +Signed-off-by: Ani Sinha +Message-Id: <20230619065209.442185-1-anisinha@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) +Signed-off-by: Michael Tokarev +(Mjt: context change for stable-7.2) +(cherry picked from commit 3d12598b74ed4bcc6db8b50818a95c4b770d4487) +--- + net/vhost-vdpa.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 7d9c4ea09d..1b4fec59a2 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -180,6 +180,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + struct vhost_dev *dev = &s->vhost_net->dev; + ++ /* ++ * If a peer NIC is attached, do not cleanup anything. ++ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() ++ * when the guest is shutting down. ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { ++ return; ++ } + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->status); + if (dev->vq_index + dev->nvqs == dev->vq_index_end) { +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index acd9de7..848722b 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.3 +Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.5 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -422,6 +422,18 @@ Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch Patch137: kvm-vdpa-stop-all-svq-on-device-deletion.patch # For bz#2221219 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) [rhel-9.2.0.z] Patch138: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +# For bz#2211923 - [qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z] +Patch139: kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch +# For bz#2227721 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z] +Patch140: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] +Patch141: kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch +# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] +Patch142: kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch +# For RHEL-1060 - [vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z] +Patch143: kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch +# For bz#2216503 - CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z] +Patch144: kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch %if %{have_clang} BuildRequires: clang @@ -1452,6 +1464,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Aug 21 2023 Miroslav Rezanina - 7.2.0-14.el9_2.5 +- kvm-vdpa-net-move-iova-tree-creation-from-init-to-start.patch [RHEL-1060] +- kvm-vdpa-reorder-vhost_vdpa_net_cvq_cmd_page_len-functio.patch [RHEL-1060] +- kvm-vdpa-map-shadow-vrings-with-MAP_SHARED.patch [RHEL-1060] +- kvm-io-remove-io-watch-if-TLS-channel-is-closed-during-h.patch [bz#2216503] +- Resolves: RHEL-1060 + ([vhost-vdpa][rhel 9.2]Boot a guest with "x-svq=on", then hot unplug this nic, guest trigger qemu core dump [rhel-9.2.0.z]) +- Resolves: bz#2216503 + (CVE-2023-3354 qemu-kvm: QEMU: VNC: improper I/O watch removal in TLS handshake can lead to remote unauthenticated denial of service [rhel-9.2.0.z]) + +* Mon Aug 07 2023 Miroslav Rezanina - 7.2.0-14.el9_2.4 +- kvm-aio-posix-fix-race-between-epoll-upgrade-and-aio_set.patch [bz#2211923] +- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2227721] +- Resolves: bz#2211923 + ([qemu-kvm] rhel guest failed boot with multi disks on error Failed to start udev Wait for Complete Device Initialization [rhel-9.2.0.z]) +- Resolves: bz#2227721 + ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest [rhel-9.2.0.z]) + * Tue Jul 11 2023 Miroslav Rezanina - 7.2.0-14.el9_2.3 - kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2221219] - Resolves: bz#2221219