You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qemu-kvm/SOURCES/kvm-virtiofsd-Add-basic-sup...

178 lines
5.9 KiB

From 93dfffa3c354c87aae712f5d6c86be5b26d975d4 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Tue, 15 Feb 2022 19:15:29 +0100
Subject: [PATCH 01/22] virtiofsd: Add basic support for FUSE_SYNCFS request
RH-Author: German Maglione <None>
RH-MergeRequest: 278: virtiofsd: Add basic support for FUSE_SYNCFS request
RH-Bugzilla: 2196880
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [1/1] 7a0cbe70d97f13e74b2116218fccd9f79d335752
Honor the expected behavior of syncfs() to synchronously flush all data
and metadata to disk on linux systems.
If virtiofsd is started with '-o announce_submounts', the client is
expected to send a FUSE_SYNCFS request for each individual submount.
In this case, we just create a new file descriptor on the submount
inode with lo_inode_open(), call syncfs() on it and close it. The
intermediary file is needed because O_PATH descriptors aren't
backed by an actual file and syncfs() would fail with EBADF.
If virtiofsd is started without '-o announce_submounts' or if the
client doesn't have the FUSE_CAP_SUBMOUNTS capability, the client
only sends a single FUSE_SYNCFS request for the root inode. The
server would thus need to track submounts internally and call
syncfs() on each of them. This will be implemented later.
Note that syncfs() might suffer from a time penalty if the submounts
are being hammered by some unrelated workload on the host. The only
solution to prevent that is to avoid shared mounts.
Signed-off-by: Greg Kurz <groug@kaod.org>
Message-Id: <20220215181529.164070-2-groug@kaod.org>
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
(cherry picked from commit 45b04ef48dbbeb18d93c2631bf5584ac493de749)
Signed-off-by: German Maglione <gmaglione@redhat.com>
---
tools/virtiofsd/fuse_lowlevel.c | 11 +++++++
tools/virtiofsd/fuse_lowlevel.h | 13 ++++++++
tools/virtiofsd/passthrough_ll.c | 44 +++++++++++++++++++++++++++
tools/virtiofsd/passthrough_seccomp.c | 1 +
4 files changed, 69 insertions(+)
diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
index 5d431a7038..57f928463a 100644
--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
@@ -1876,6 +1876,16 @@ static void do_lseek(fuse_req_t req, fuse_ino_t nodeid,
}
}
+static void do_syncfs(fuse_req_t req, fuse_ino_t nodeid,
+ struct fuse_mbuf_iter *iter)
+{
+ if (req->se->op.syncfs) {
+ req->se->op.syncfs(req, nodeid);
+ } else {
+ fuse_reply_err(req, ENOSYS);
+ }
+}
+
static void do_init(fuse_req_t req, fuse_ino_t nodeid,
struct fuse_mbuf_iter *iter)
{
@@ -2282,6 +2292,7 @@ static struct {
[FUSE_RENAME2] = { do_rename2, "RENAME2" },
[FUSE_COPY_FILE_RANGE] = { do_copy_file_range, "COPY_FILE_RANGE" },
[FUSE_LSEEK] = { do_lseek, "LSEEK" },
+ [FUSE_SYNCFS] = { do_syncfs, "SYNCFS" },
};
#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
diff --git a/tools/virtiofsd/fuse_lowlevel.h b/tools/virtiofsd/fuse_lowlevel.h
index c55c0ca2fc..b889dae4de 100644
--- a/tools/virtiofsd/fuse_lowlevel.h
+++ b/tools/virtiofsd/fuse_lowlevel.h
@@ -1226,6 +1226,19 @@ struct fuse_lowlevel_ops {
*/
void (*lseek)(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
struct fuse_file_info *fi);
+
+ /**
+ * Synchronize file system content
+ *
+ * If this request is answered with an error code of ENOSYS,
+ * this is treated as success and future calls to syncfs() will
+ * succeed automatically without being sent to the filesystem
+ * process.
+ *
+ * @param req request handle
+ * @param ino the inode number
+ */
+ void (*syncfs)(fuse_req_t req, fuse_ino_t ino);
};
/**
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 523d8fbe1e..00ccb90a72 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -3357,6 +3357,49 @@ static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
}
}
+static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode)
+{
+ int fd, ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n",
+ inode->fuse_ino);
+
+ fd = lo_inode_open(lo, inode, O_RDONLY);
+ if (fd < 0) {
+ return -fd;
+ }
+
+ if (syncfs(fd) < 0) {
+ ret = errno;
+ }
+
+ close(fd);
+ return ret;
+}
+
+static void lo_syncfs(fuse_req_t req, fuse_ino_t ino)
+{
+ struct lo_data *lo = lo_data(req);
+ struct lo_inode *inode = lo_inode(req, ino);
+ int err;
+
+ if (!inode) {
+ fuse_reply_err(req, EBADF);
+ return;
+ }
+
+ err = lo_do_syncfs(lo, inode);
+ lo_inode_put(lo, &inode);
+
+ /*
+ * If submounts aren't announced, the client only sends a request to
+ * sync the root inode. TODO: Track submounts internally and iterate
+ * over them as well.
+ */
+
+ fuse_reply_err(req, err);
+}
+
static void lo_destroy(void *userdata)
{
struct lo_data *lo = (struct lo_data *)userdata;
@@ -3417,6 +3460,7 @@ static struct fuse_lowlevel_ops lo_oper = {
.copy_file_range = lo_copy_file_range,
#endif
.lseek = lo_lseek,
+ .syncfs = lo_syncfs,
.destroy = lo_destroy,
};
diff --git a/tools/virtiofsd/passthrough_seccomp.c b/tools/virtiofsd/passthrough_seccomp.c
index a3ce9f898d..3e9d6181dc 100644
--- a/tools/virtiofsd/passthrough_seccomp.c
+++ b/tools/virtiofsd/passthrough_seccomp.c
@@ -108,6 +108,7 @@ static const int syscall_allowlist[] = {
SCMP_SYS(set_robust_list),
SCMP_SYS(setxattr),
SCMP_SYS(symlinkat),
+ SCMP_SYS(syncfs),
SCMP_SYS(time), /* Rarely needed, except on static builds */
SCMP_SYS(tgkill),
SCMP_SYS(unlinkat),
--
2.37.3