Blame SOURCES/0027-libmount-improve-mountinfo-reliability.patch

22a545
From 32fe4f1dd8fbc104bd848e24de613122947f095a Mon Sep 17 00:00:00 2001
22a545
From: Karel Zak <kzak@redhat.com>
22a545
Date: Wed, 28 Aug 2019 15:47:16 +0200
22a545
Subject: [PATCH] libmount: improve mountinfo reliability
22a545
MIME-Version: 1.0
22a545
Content-Type: text/plain; charset=UTF-8
22a545
Content-Transfer-Encoding: 8bit
22a545
22a545
The standard way how we read mount table is not reliable because
22a545
during the read() syscalls the table may be modified by some another
22a545
process. The changes in the table is possible to detect by poll()
22a545
event, and in this case it seems better to lseek to the begin of the file
22a545
and read it again. It's expensive, but better than races...
22a545
22a545
This patch does not modify mountinfo parser, but it reads all file to
22a545
memory (by read()+poll()) and than it creates memory stream
22a545
from the buffer and use it rather than a regular file stream.
22a545
22a545
It means the parser is still possible to use for normal files
22a545
(e.g. fstab) as well as for mountinfo and it's also portable to
22a545
systems where for some reason is no fmemopen().
22a545
22a545
Addresses: https://github.com/systemd/systemd/issues/10872
22a545
Addresses: https://bugzilla.redhat.com/show_bug.cgi?id=1751447
22a545
Upstream: http://github.com/karelzak/util-linux/commit/e4925f591c1bfb83719418b56b952830d15b77eb
22a545
Upstream: http://github.com/karelzak/util-linux/commit/ee551c909f95437fd9fcd162f398c069d0ce9720
22a545
Reported-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
22a545
Signed-off-by: Karel Zak <kzak@redhat.com>
22a545
---
22a545
 configure.ac             |   1 +
22a545
 libmount/src/mountP.h    |   2 +
22a545
 libmount/src/tab_parse.c |  87 +++++++++++++++++----
22a545
 libmount/src/utils.c     | 158 +++++++++++++++++++++++++++++++++++++++
22a545
 4 files changed, 233 insertions(+), 15 deletions(-)
22a545
22a545
diff --git a/configure.ac b/configure.ac
22a545
index a05a294ad..245004890 100644
22a545
--- a/configure.ac
22a545
+++ b/configure.ac
22a545
@@ -456,6 +456,7 @@ AC_CHECK_FUNCS([ \
22a545
 	err \
22a545
 	errx \
22a545
 	explicit_bzero \
22a545
+	fmemopen \
22a545
 	fsync \
22a545
 	utimensat \
22a545
 	getdomainname \
22a545
diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h
22a545
index d47d26442..52a238ef3 100644
22a545
--- a/libmount/src/mountP.h
22a545
+++ b/libmount/src/mountP.h
22a545
@@ -93,6 +93,7 @@ extern int mnt_valid_tagname(const char *tagname);
22a545
 extern int append_string(char **a, const char *b);
22a545
 
22a545
 extern const char *mnt_statfs_get_fstype(struct statfs *vfs);
22a545
+extern int is_procfs_fd(int fd);
22a545
 extern int is_file_empty(const char *name);
22a545
 
22a545
 extern int mnt_is_readonly(const char *path)
22a545
@@ -118,6 +119,7 @@ extern void mnt_free_filesystems(char **filesystems);
22a545
 extern char *mnt_get_kernel_cmdline_option(const char *name);
22a545
 extern int mnt_guess_system_root(dev_t devno, struct libmnt_cache *cache, char **path);
22a545
 extern int mnt_stat_mountpoint(const char *target, struct stat *st);
22a545
+extern FILE *mnt_get_procfs_memstream(int fd, char **membuf);
22a545
 
22a545
 /* tab.c */
22a545
 extern int is_mountinfo(struct libmnt_table *tb);
22a545
diff --git a/libmount/src/tab_parse.c b/libmount/src/tab_parse.c
22a545
index 3ed84ebc2..10fc68279 100644
22a545
--- a/libmount/src/tab_parse.c
22a545
+++ b/libmount/src/tab_parse.c
22a545
@@ -603,15 +603,7 @@ static int kernel_fs_postparse(struct libmnt_table *tb,
22a545
 	return rc;
22a545
 }
22a545
 
22a545
-/**
22a545
- * mnt_table_parse_stream:
22a545
- * @tb: tab pointer
22a545
- * @f: file stream
22a545
- * @filename: filename used for debug and error messages
22a545
- *
22a545
- * Returns: 0 on success, negative number in case of error.
22a545
- */
22a545
-int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
22a545
+static int __table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
22a545
 {
22a545
 	int rc = -1;
22a545
 	int flags = 0;
22a545
@@ -685,6 +677,40 @@ err:
22a545
 	return rc;
22a545
 }
22a545
 
22a545
+/**
22a545
+ * mnt_table_parse_stream:
22a545
+ * @tb: tab pointer
22a545
+ * @f: file stream
22a545
+ * @filename: filename used for debug and error messages
22a545
+ *
22a545
+ * Returns: 0 on success, negative number in case of error.
22a545
+ */
22a545
+int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
22a545
+{
22a545
+	int fd, rc;
22a545
+	FILE *memf = NULL;
22a545
+	char *membuf = NULL;
22a545
+
22a545
+	/*
22a545
+	 * For /proc/#/{mountinfo,mount} we read all file to memory and use it
22a545
+	 * as memory stream. For more details see mnt_read_procfs_file().
22a545
+	 */
22a545
+	if ((fd = fileno(f)) >= 0
22a545
+	    && (tb->fmt == MNT_FMT_GUESS ||
22a545
+		tb->fmt == MNT_FMT_MOUNTINFO ||
22a545
+		tb->fmt == MNT_FMT_MTAB)
22a545
+	    && is_procfs_fd(fd)
22a545
+	    && (memf = mnt_get_procfs_memstream(fd, &membuf))) {
22a545
+
22a545
+		rc = __table_parse_stream(tb, memf, filename);
22a545
+		fclose(memf);
22a545
+		free(membuf);
22a545
+	} else
22a545
+		rc = __table_parse_stream(tb, f, filename);
22a545
+
22a545
+	return rc;
22a545
+}
22a545
+
22a545
 /**
22a545
  * mnt_table_parse_file:
22a545
  * @tb: tab pointer
22a545
@@ -700,18 +726,49 @@ err:
22a545
 int mnt_table_parse_file(struct libmnt_table *tb, const char *filename)
22a545
 {
22a545
 	FILE *f;
22a545
-	int rc;
22a545
+	int rc, fd = -1;
22a545
 
22a545
 	if (!filename || !tb)
22a545
 		return -EINVAL;
22a545
 
22a545
-	f = fopen(filename, "r" UL_CLOEXECSTR);
22a545
+	/*
22a545
+	 * Try to use read()+poll() to realiably read all
22a545
+	 * /proc/#/{mount,mountinfo} file to memory
22a545
+	 */
22a545
+	if (tb->fmt != MNT_FMT_SWAPS
22a545
+	    && strncmp(filename, "/proc/", 6) == 0) {
22a545
+
22a545
+		FILE *memf;
22a545
+		char *membuf = NULL;
22a545
+
22a545
+		fd = open(filename, O_RDONLY|O_CLOEXEC);
22a545
+		if (fd < 0) {
22a545
+			rc = -errno;
22a545
+			goto done;
22a545
+		}
22a545
+		memf = mnt_get_procfs_memstream(fd, &membuf);
22a545
+		if (memf) {
22a545
+			rc = __table_parse_stream(tb, memf, filename);
22a545
+
22a545
+			fclose(memf);
22a545
+			free(membuf);
22a545
+			close(fd);
22a545
+			goto done;
22a545
+		}
22a545
+		/* else fallback to fopen/fdopen() */
22a545
+	}
22a545
+
22a545
+	if (fd >= 0)
22a545
+		f = fdopen(fd, "r" UL_CLOEXECSTR);
22a545
+	else
22a545
+		f = fopen(filename, "r" UL_CLOEXECSTR);
22a545
+
22a545
 	if (f) {
22a545
-		rc = mnt_table_parse_stream(tb, f, filename);
22a545
+		rc = __table_parse_stream(tb, f, filename);
22a545
 		fclose(f);
22a545
 	} else
22a545
 		rc = -errno;
22a545
-
22a545
+done:
22a545
 	DBG(TAB, ul_debugobj(tb, "parsing done [filename=%s, rc=%d]", filename, rc));
22a545
 	return rc;
22a545
 }
22a545
@@ -768,7 +825,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
22a545
 
22a545
 		f = fopen_at(dd, d->d_name, O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
22a545
 		if (f) {
22a545
-			mnt_table_parse_stream(tb, f, d->d_name);
22a545
+			__table_parse_stream(tb, f, d->d_name);
22a545
 			fclose(f);
22a545
 		}
22a545
 	}
22a545
@@ -809,7 +866,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
22a545
 		f = fopen_at(dirfd(dir), d->d_name,
22a545
 				O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
22a545
 		if (f) {
22a545
-			mnt_table_parse_stream(tb, f, d->d_name);
22a545
+			__table_parse_stream(tb, f, d->d_name);
22a545
 			fclose(f);
22a545
 		}
22a545
 	}
22a545
diff --git a/libmount/src/utils.c b/libmount/src/utils.c
22a545
index c36187c07..f7d85d124 100644
22a545
--- a/libmount/src/utils.c
22a545
+++ b/libmount/src/utils.c
22a545
@@ -14,6 +14,7 @@
22a545
 #include <fcntl.h>
22a545
 #include <pwd.h>
22a545
 #include <grp.h>
22a545
+#include <poll.h>
22a545
 #include <blkid.h>
22a545
 
22a545
 #include "strutils.h"
22a545
@@ -408,6 +409,12 @@ const char *mnt_statfs_get_fstype(struct statfs *vfs)
22a545
 	return NULL;
22a545
 }
22a545
 
22a545
+int is_procfs_fd(int fd)
22a545
+{
22a545
+	struct statfs sfs;
22a545
+
22a545
+	return fstatfs(fd, &sfs) == 0 && sfs.f_type == STATFS_PROC_MAGIC;
22a545
+}
22a545
 
22a545
 /**
22a545
  * mnt_match_fstype:
22a545
@@ -1117,8 +1124,158 @@ done:
22a545
 	return 1;
22a545
 }
22a545
 
22a545
+#if defined(HAVE_FMEMOPEN) || defined(TEST_PROGRAM)
22a545
+
22a545
+/*
22a545
+ * This function tries to minimize possible races when we read
22a545
+ * /proc/#/{mountinfo,mount} files.
22a545
+ *
22a545
+ * The idea is to minimize number of read()s and check by poll() that during
22a545
+ * the read the mount table has not been modified. If yes, than re-read it
22a545
+ * (with some limitations to avoid never ending loop).
22a545
+ *
22a545
+ * Returns: <0 error, 0 success, 1 too many attempts
22a545
+ */
22a545
+static int read_procfs_file(int fd, char **buf, size_t *bufsiz)
22a545
+{
22a545
+	size_t bufmax = 0;
22a545
+	int rc = 0, tries = 0, ninters = 0;
22a545
+	char *bufptr = NULL;;
22a545
+
22a545
+	assert(buf);
22a545
+	assert(bufsiz);
22a545
+
22a545
+	*bufsiz = 0;
22a545
+	*buf = NULL;
22a545
+
22a545
+	do {
22a545
+		ssize_t ret;
22a545
+
22a545
+		if (!bufptr || bufmax == *bufsiz) {
22a545
+			char *tmp;
22a545
+
22a545
+			bufmax = bufmax ? bufmax * 2 : (16 * 1024);
22a545
+			tmp = realloc(*buf, bufmax);
22a545
+			if (!tmp)
22a545
+				break;
22a545
+			*buf = tmp;
22a545
+			bufptr = tmp + *bufsiz;
22a545
+		}
22a545
+
22a545
+		errno = 0;
22a545
+		ret = read(fd, bufptr, bufmax - *bufsiz);
22a545
+
22a545
+		if (ret < 0) {
22a545
+			/* error */
22a545
+			if ((errno == EAGAIN || errno == EINTR) && (ninters++ < 5)) {
22a545
+				xusleep(200000);
22a545
+				continue;
22a545
+			}
22a545
+			break;
22a545
+
22a545
+		} else if (ret > 0) {
22a545
+			/* success -- verify no event during read */
22a545
+			struct pollfd fds[] = {
22a545
+				{ .fd = fd, .events = POLLPRI }
22a545
+			};
22a545
+
22a545
+			rc = poll(fds, 1, 0);
22a545
+			if (rc < 0)
22a545
+				break;		/* poll() error */
22a545
+			if (rc > 0) {
22a545
+				/* event -- read all again */
22a545
+				if (lseek(fd, 0, SEEK_SET) != 0)
22a545
+					break;
22a545
+				*bufsiz = 0;
22a545
+				bufptr = *buf;
22a545
+				tries++;
22a545
+
22a545
+				if (tries > 10)
22a545
+					/* busy system? -- wait */
22a545
+					xusleep(10000);
22a545
+				continue;
22a545
+			}
22a545
+
22a545
+			/* successful read() without active poll() */
22a545
+			(*bufsiz) += (size_t) ret;
22a545
+			bufptr += ret;
22a545
+			tries = ninters = 0;
22a545
+		} else {
22a545
+			/* end-of-file */
22a545
+			goto success;
22a545
+		}
22a545
+	} while (tries <= 100);
22a545
+
22a545
+	rc = errno ? -errno : 1;
22a545
+	free(*buf);
22a545
+	return rc;
22a545
+
22a545
+success:
22a545
+	return 0;
22a545
+}
22a545
+
22a545
+/*
22a545
+ * Create FILE stream for data from read_procfs_file()
22a545
+ */
22a545
+FILE *mnt_get_procfs_memstream(int fd, char **membuf)
22a545
+{
22a545
+	FILE *memf;
22a545
+	size_t sz = 0;
22a545
+	off_t cur;
22a545
+
22a545
+	/* in case of error, rewind to the original position */
22a545
+	cur = lseek(fd, 0, SEEK_CUR);
22a545
+
22a545
+	if (read_procfs_file(fd, membuf, &sz) == 0
22a545
+	    && sz > 0
22a545
+	    && (memf = fmemopen(*membuf, sz, "r")))
22a545
+		return memf;
22a545
+
22a545
+	/* error */
22a545
+	lseek(fd, cur, SEEK_SET);
22a545
+	return NULL;
22a545
+}
22a545
+#else
22a545
+FILE *mnt_get_procfs_memstream(int fd __attribute((__unused__)),
22a545
+		               char **membuf __attribute((__unused__)))
22a545
+{
22a545
+	return NULL;
22a545
+}
22a545
+#endif /* HAVE_FMEMOPEN */
22a545
+
22a545
 
22a545
 #ifdef TEST_PROGRAM
22a545
+static int test_proc_read(struct libmnt_test *ts, int argc, char *argv[])
22a545
+{
22a545
+	char *buf = NULL;
22a545
+	char *filename = argv[1];
22a545
+	size_t bufsiz = 0;
22a545
+	int rc = 0, fd = open(filename, O_RDONLY);
22a545
+
22a545
+	if (fd <= 0) {
22a545
+		warn("%s: cannot open", filename);
22a545
+		return -errno;
22a545
+	}
22a545
+
22a545
+	rc = read_procfs_file(fd, &buf, &bufsiz);
22a545
+	close(fd);
22a545
+
22a545
+	switch (rc) {
22a545
+	case 0:
22a545
+		fwrite(buf, 1, bufsiz, stdout);
22a545
+		free(buf);
22a545
+		break;
22a545
+	case 1:
22a545
+		warnx("too many attempts");
22a545
+		break;
22a545
+	default:
22a545
+		warn("%s: cannot read", filename);
22a545
+		break;
22a545
+	}
22a545
+
22a545
+	return rc;
22a545
+}
22a545
+
22a545
 static int test_match_fstype(struct libmnt_test *ts, int argc, char *argv[])
22a545
 {
22a545
 	char *type = argv[1];
22a545
@@ -1300,6 +1457,7 @@ int main(int argc, char *argv[])
22a545
 	{ "--guess-root",    test_guess_root,      "[<maj:min>]" },
22a545
 	{ "--mkdir",         test_mkdir,           "<path>" },
22a545
 	{ "--statfs-type",   test_statfs_type,     "<path>" },
22a545
+	{ "--read-procfs",   test_proc_read,       "<path>" },
22a545
 
22a545
 	{ NULL }
22a545
 	};
22a545
-- 
22a545
2.21.0
22a545