forked from rpms/qemu-kvm
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
218 lines
8.1 KiB
218 lines
8.1 KiB
9 months ago
|
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
|
||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
Date: Thu, 14 Sep 2023 10:00:58 -0400
|
||
|
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
|
||
|
the current thread
|
||
|
|
||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||
|
RH-Jira: RHEL-15965
|
||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
|
||
|
|
||
|
The file-posix block driver currently only sets up Linux AIO and
|
||
|
io_uring in the BDS's AioContext. In the multi-queue block layer we must
|
||
|
be able to submit I/O requests in AioContexts that do not have Linux AIO
|
||
|
and io_uring set up yet since any thread can call into the block driver.
|
||
|
|
||
|
Set up Linux AIO and io_uring for the current AioContext during request
|
||
|
submission. We lose the ability to return an error from
|
||
|
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
|
||
|
resource limits). Instead the user only gets warnings and we fall back
|
||
|
to aio=threads. This is still better than a fatal error after startup.
|
||
|
|
||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
|
||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||
|
---
|
||
|
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
|
||
|
1 file changed, 51 insertions(+), 52 deletions(-)
|
||
|
|
||
|
diff --git a/block/file-posix.c b/block/file-posix.c
|
||
|
index b862406c71..35684f7e21 100644
|
||
|
--- a/block/file-posix.c
|
||
|
+++ b/block/file-posix.c
|
||
|
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||
|
|
||
|
#ifdef CONFIG_LINUX_AIO
|
||
|
/* Currently Linux does AIO only for files opened with O_DIRECT */
|
||
|
- if (s->use_linux_aio) {
|
||
|
- if (!(s->open_flags & O_DIRECT)) {
|
||
|
- error_setg(errp, "aio=native was specified, but it requires "
|
||
|
- "cache.direct=on, which was not specified.");
|
||
|
- ret = -EINVAL;
|
||
|
- goto fail;
|
||
|
- }
|
||
|
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
|
||
|
- error_prepend(errp, "Unable to use native AIO: ");
|
||
|
- goto fail;
|
||
|
- }
|
||
|
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
|
||
|
+ error_setg(errp, "aio=native was specified, but it requires "
|
||
|
+ "cache.direct=on, which was not specified.");
|
||
|
+ ret = -EINVAL;
|
||
|
+ goto fail;
|
||
|
}
|
||
|
#else
|
||
|
if (s->use_linux_aio) {
|
||
|
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||
|
}
|
||
|
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||
|
|
||
|
-#ifdef CONFIG_LINUX_IO_URING
|
||
|
- if (s->use_linux_io_uring) {
|
||
|
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
|
||
|
- error_prepend(errp, "Unable to use io_uring: ");
|
||
|
- goto fail;
|
||
|
- }
|
||
|
- }
|
||
|
-#else
|
||
|
+#ifndef CONFIG_LINUX_IO_URING
|
||
|
if (s->use_linux_io_uring) {
|
||
|
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||
|
"in this build.");
|
||
|
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_LINUX_IO_URING
|
||
|
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||
|
+{
|
||
|
+ Error *local_err = NULL;
|
||
|
+ AioContext *ctx;
|
||
|
+
|
||
|
+ if (!s->use_linux_io_uring) {
|
||
|
+ return false;
|
||
|
+ }
|
||
|
+
|
||
|
+ ctx = qemu_get_current_aio_context();
|
||
|
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||
|
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||
|
+ "falling back to thread pool: ");
|
||
|
+ s->use_linux_io_uring = false;
|
||
|
+ return false;
|
||
|
+ }
|
||
|
+ return true;
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
+#ifdef CONFIG_LINUX_AIO
|
||
|
+static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||
|
+{
|
||
|
+ Error *local_err = NULL;
|
||
|
+ AioContext *ctx;
|
||
|
+
|
||
|
+ if (!s->use_linux_aio) {
|
||
|
+ return false;
|
||
|
+ }
|
||
|
+
|
||
|
+ ctx = qemu_get_current_aio_context();
|
||
|
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
|
||
|
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
|
||
|
+ "falling back to thread pool: ");
|
||
|
+ s->use_linux_aio = false;
|
||
|
+ return false;
|
||
|
+ }
|
||
|
+ return true;
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||
|
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||
|
{
|
||
|
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||
|
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||
|
type |= QEMU_AIO_MISALIGNED;
|
||
|
#ifdef CONFIG_LINUX_IO_URING
|
||
|
- } else if (s->use_linux_io_uring) {
|
||
|
+ } else if (raw_check_linux_io_uring(s)) {
|
||
|
assert(qiov->size == bytes);
|
||
|
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
||
|
goto out;
|
||
|
#endif
|
||
|
#ifdef CONFIG_LINUX_AIO
|
||
|
- } else if (s->use_linux_aio) {
|
||
|
+ } else if (raw_check_linux_aio(s)) {
|
||
|
assert(qiov->size == bytes);
|
||
|
ret = laio_co_submit(s->fd, offset, qiov, type,
|
||
|
s->aio_max_batch);
|
||
|
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||
|
};
|
||
|
|
||
|
#ifdef CONFIG_LINUX_IO_URING
|
||
|
- if (s->use_linux_io_uring) {
|
||
|
+ if (raw_check_linux_io_uring(s)) {
|
||
|
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
||
|
}
|
||
|
#endif
|
||
|
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||
|
}
|
||
|
|
||
|
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
|
||
|
- AioContext *new_context)
|
||
|
-{
|
||
|
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
|
||
|
-#ifdef CONFIG_LINUX_AIO
|
||
|
- if (s->use_linux_aio) {
|
||
|
- Error *local_err = NULL;
|
||
|
- if (!aio_setup_linux_aio(new_context, &local_err)) {
|
||
|
- error_reportf_err(local_err, "Unable to use native AIO, "
|
||
|
- "falling back to thread pool: ");
|
||
|
- s->use_linux_aio = false;
|
||
|
- }
|
||
|
- }
|
||
|
-#endif
|
||
|
-#ifdef CONFIG_LINUX_IO_URING
|
||
|
- if (s->use_linux_io_uring) {
|
||
|
- Error *local_err = NULL;
|
||
|
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
|
||
|
- error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||
|
- "falling back to thread pool: ");
|
||
|
- s->use_linux_io_uring = false;
|
||
|
- }
|
||
|
- }
|
||
|
-#endif
|
||
|
-}
|
||
|
-
|
||
|
static void raw_close(BlockDriverState *bs)
|
||
|
{
|
||
|
BDRVRawState *s = bs->opaque;
|
||
|
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
|
||
|
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||
|
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||
|
.bdrv_refresh_limits = raw_refresh_limits,
|
||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||
|
|
||
|
.bdrv_co_truncate = raw_co_truncate,
|
||
|
.bdrv_co_getlength = raw_co_getlength,
|
||
|
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
|
||
|
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||
|
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||
|
.bdrv_refresh_limits = raw_refresh_limits,
|
||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||
|
|
||
|
.bdrv_co_truncate = raw_co_truncate,
|
||
|
.bdrv_co_getlength = raw_co_getlength,
|
||
|
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||
|
.bdrv_co_pwritev = raw_co_pwritev,
|
||
|
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||
|
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||
|
|
||
|
.bdrv_co_truncate = raw_co_truncate,
|
||
|
.bdrv_co_getlength = raw_co_getlength,
|
||
|
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||
|
.bdrv_co_pwritev = raw_co_pwritev,
|
||
|
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||
|
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||
|
|
||
|
.bdrv_co_truncate = raw_co_truncate,
|
||
|
.bdrv_co_getlength = raw_co_getlength,
|
||
|
--
|
||
|
2.39.3
|
||
|
|