Blame SOURCES/0027-libmount-improve-mountinfo-reliability.patch

fe1ca8
From 32fe4f1dd8fbc104bd848e24de613122947f095a Mon Sep 17 00:00:00 2001
fe1ca8
From: Karel Zak <kzak@redhat.com>
fe1ca8
Date: Wed, 28 Aug 2019 15:47:16 +0200
fe1ca8
Subject: [PATCH] libmount: improve mountinfo reliability
fe1ca8
MIME-Version: 1.0
fe1ca8
Content-Type: text/plain; charset=UTF-8
fe1ca8
Content-Transfer-Encoding: 8bit
fe1ca8
fe1ca8
The standard way how we read mount table is not reliable because
fe1ca8
during the read() syscalls the table may be modified by some another
fe1ca8
process. The changes in the table is possible to detect by poll()
fe1ca8
event, and in this case it seems better to lseek to the begin of the file
fe1ca8
and read it again. It's expensive, but better than races...
fe1ca8
fe1ca8
This patch does not modify mountinfo parser, but it reads all file to
fe1ca8
memory (by read()+poll()) and than it creates memory stream
fe1ca8
from the buffer and use it rather than a regular file stream.
fe1ca8
fe1ca8
It means the parser is still possible to use for normal files
fe1ca8
(e.g. fstab) as well as for mountinfo and it's also portable to
fe1ca8
systems where for some reason is no fmemopen().
fe1ca8
fe1ca8
Addresses: https://github.com/systemd/systemd/issues/10872
fe1ca8
Addresses: https://bugzilla.redhat.com/show_bug.cgi?id=1751447
fe1ca8
Upstream: http://github.com/karelzak/util-linux/commit/e4925f591c1bfb83719418b56b952830d15b77eb
fe1ca8
Upstream: http://github.com/karelzak/util-linux/commit/ee551c909f95437fd9fcd162f398c069d0ce9720
fe1ca8
Reported-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
fe1ca8
Signed-off-by: Karel Zak <kzak@redhat.com>
fe1ca8
---
fe1ca8
 configure.ac             |   1 +
fe1ca8
 libmount/src/mountP.h    |   2 +
fe1ca8
 libmount/src/tab_parse.c |  87 +++++++++++++++++----
fe1ca8
 libmount/src/utils.c     | 158 +++++++++++++++++++++++++++++++++++++++
fe1ca8
 4 files changed, 233 insertions(+), 15 deletions(-)
fe1ca8
fe1ca8
diff --git a/configure.ac b/configure.ac
fe1ca8
index a05a294ad..245004890 100644
fe1ca8
--- a/configure.ac
fe1ca8
+++ b/configure.ac
fe1ca8
@@ -456,6 +456,7 @@ AC_CHECK_FUNCS([ \
fe1ca8
 	err \
fe1ca8
 	errx \
fe1ca8
 	explicit_bzero \
fe1ca8
+	fmemopen \
fe1ca8
 	fsync \
fe1ca8
 	utimensat \
fe1ca8
 	getdomainname \
fe1ca8
diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h
fe1ca8
index d47d26442..52a238ef3 100644
fe1ca8
--- a/libmount/src/mountP.h
fe1ca8
+++ b/libmount/src/mountP.h
fe1ca8
@@ -93,6 +93,7 @@ extern int mnt_valid_tagname(const char *tagname);
fe1ca8
 extern int append_string(char **a, const char *b);
fe1ca8
 
fe1ca8
 extern const char *mnt_statfs_get_fstype(struct statfs *vfs);
fe1ca8
+extern int is_procfs_fd(int fd);
fe1ca8
 extern int is_file_empty(const char *name);
fe1ca8
 
fe1ca8
 extern int mnt_is_readonly(const char *path)
fe1ca8
@@ -118,6 +119,7 @@ extern void mnt_free_filesystems(char **filesystems);
fe1ca8
 extern char *mnt_get_kernel_cmdline_option(const char *name);
fe1ca8
 extern int mnt_guess_system_root(dev_t devno, struct libmnt_cache *cache, char **path);
fe1ca8
 extern int mnt_stat_mountpoint(const char *target, struct stat *st);
fe1ca8
+extern FILE *mnt_get_procfs_memstream(int fd, char **membuf);
fe1ca8
 
fe1ca8
 /* tab.c */
fe1ca8
 extern int is_mountinfo(struct libmnt_table *tb);
fe1ca8
diff --git a/libmount/src/tab_parse.c b/libmount/src/tab_parse.c
fe1ca8
index 3ed84ebc2..10fc68279 100644
fe1ca8
--- a/libmount/src/tab_parse.c
fe1ca8
+++ b/libmount/src/tab_parse.c
fe1ca8
@@ -603,15 +603,7 @@ static int kernel_fs_postparse(struct libmnt_table *tb,
fe1ca8
 	return rc;
fe1ca8
 }
fe1ca8
 
fe1ca8
-/**
fe1ca8
- * mnt_table_parse_stream:
fe1ca8
- * @tb: tab pointer
fe1ca8
- * @f: file stream
fe1ca8
- * @filename: filename used for debug and error messages
fe1ca8
- *
fe1ca8
- * Returns: 0 on success, negative number in case of error.
fe1ca8
- */
fe1ca8
-int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
fe1ca8
+static int __table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
fe1ca8
 {
fe1ca8
 	int rc = -1;
fe1ca8
 	int flags = 0;
fe1ca8
@@ -685,6 +677,40 @@ err:
fe1ca8
 	return rc;
fe1ca8
 }
fe1ca8
 
fe1ca8
+/**
fe1ca8
+ * mnt_table_parse_stream:
fe1ca8
+ * @tb: tab pointer
fe1ca8
+ * @f: file stream
fe1ca8
+ * @filename: filename used for debug and error messages
fe1ca8
+ *
fe1ca8
+ * Returns: 0 on success, negative number in case of error.
fe1ca8
+ */
fe1ca8
+int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
fe1ca8
+{
fe1ca8
+	int fd, rc;
fe1ca8
+	FILE *memf = NULL;
fe1ca8
+	char *membuf = NULL;
fe1ca8
+
fe1ca8
+	/*
fe1ca8
+	 * For /proc/#/{mountinfo,mount} we read all file to memory and use it
fe1ca8
+	 * as memory stream. For more details see mnt_read_procfs_file().
fe1ca8
+	 */
fe1ca8
+	if ((fd = fileno(f)) >= 0
fe1ca8
+	    && (tb->fmt == MNT_FMT_GUESS ||
fe1ca8
+		tb->fmt == MNT_FMT_MOUNTINFO ||
fe1ca8
+		tb->fmt == MNT_FMT_MTAB)
fe1ca8
+	    && is_procfs_fd(fd)
fe1ca8
+	    && (memf = mnt_get_procfs_memstream(fd, &membuf))) {
fe1ca8
+
fe1ca8
+		rc = __table_parse_stream(tb, memf, filename);
fe1ca8
+		fclose(memf);
fe1ca8
+		free(membuf);
fe1ca8
+	} else
fe1ca8
+		rc = __table_parse_stream(tb, f, filename);
fe1ca8
+
fe1ca8
+	return rc;
fe1ca8
+}
fe1ca8
+
fe1ca8
 /**
fe1ca8
  * mnt_table_parse_file:
fe1ca8
  * @tb: tab pointer
fe1ca8
@@ -700,18 +726,49 @@ err:
fe1ca8
 int mnt_table_parse_file(struct libmnt_table *tb, const char *filename)
fe1ca8
 {
fe1ca8
 	FILE *f;
fe1ca8
-	int rc;
fe1ca8
+	int rc, fd = -1;
fe1ca8
 
fe1ca8
 	if (!filename || !tb)
fe1ca8
 		return -EINVAL;
fe1ca8
 
fe1ca8
-	f = fopen(filename, "r" UL_CLOEXECSTR);
fe1ca8
+	/*
fe1ca8
+	 * Try to use read()+poll() to realiably read all
fe1ca8
+	 * /proc/#/{mount,mountinfo} file to memory
fe1ca8
+	 */
fe1ca8
+	if (tb->fmt != MNT_FMT_SWAPS
fe1ca8
+	    && strncmp(filename, "/proc/", 6) == 0) {
fe1ca8
+
fe1ca8
+		FILE *memf;
fe1ca8
+		char *membuf = NULL;
fe1ca8
+
fe1ca8
+		fd = open(filename, O_RDONLY|O_CLOEXEC);
fe1ca8
+		if (fd < 0) {
fe1ca8
+			rc = -errno;
fe1ca8
+			goto done;
fe1ca8
+		}
fe1ca8
+		memf = mnt_get_procfs_memstream(fd, &membuf);
fe1ca8
+		if (memf) {
fe1ca8
+			rc = __table_parse_stream(tb, memf, filename);
fe1ca8
+
fe1ca8
+			fclose(memf);
fe1ca8
+			free(membuf);
fe1ca8
+			close(fd);
fe1ca8
+			goto done;
fe1ca8
+		}
fe1ca8
+		/* else fallback to fopen/fdopen() */
fe1ca8
+	}
fe1ca8
+
fe1ca8
+	if (fd >= 0)
fe1ca8
+		f = fdopen(fd, "r" UL_CLOEXECSTR);
fe1ca8
+	else
fe1ca8
+		f = fopen(filename, "r" UL_CLOEXECSTR);
fe1ca8
+
fe1ca8
 	if (f) {
fe1ca8
-		rc = mnt_table_parse_stream(tb, f, filename);
fe1ca8
+		rc = __table_parse_stream(tb, f, filename);
fe1ca8
 		fclose(f);
fe1ca8
 	} else
fe1ca8
 		rc = -errno;
fe1ca8
-
fe1ca8
+done:
fe1ca8
 	DBG(TAB, ul_debugobj(tb, "parsing done [filename=%s, rc=%d]", filename, rc));
fe1ca8
 	return rc;
fe1ca8
 }
fe1ca8
@@ -768,7 +825,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
fe1ca8
 
fe1ca8
 		f = fopen_at(dd, d->d_name, O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
fe1ca8
 		if (f) {
fe1ca8
-			mnt_table_parse_stream(tb, f, d->d_name);
fe1ca8
+			__table_parse_stream(tb, f, d->d_name);
fe1ca8
 			fclose(f);
fe1ca8
 		}
fe1ca8
 	}
fe1ca8
@@ -809,7 +866,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
fe1ca8
 		f = fopen_at(dirfd(dir), d->d_name,
fe1ca8
 				O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
fe1ca8
 		if (f) {
fe1ca8
-			mnt_table_parse_stream(tb, f, d->d_name);
fe1ca8
+			__table_parse_stream(tb, f, d->d_name);
fe1ca8
 			fclose(f);
fe1ca8
 		}
fe1ca8
 	}
fe1ca8
diff --git a/libmount/src/utils.c b/libmount/src/utils.c
fe1ca8
index c36187c07..f7d85d124 100644
fe1ca8
--- a/libmount/src/utils.c
fe1ca8
+++ b/libmount/src/utils.c
fe1ca8
@@ -14,6 +14,7 @@
fe1ca8
 #include <fcntl.h>
fe1ca8
 #include <pwd.h>
fe1ca8
 #include <grp.h>
fe1ca8
+#include <poll.h>
fe1ca8
 #include <blkid.h>
fe1ca8
 
fe1ca8
 #include "strutils.h"
fe1ca8
@@ -408,6 +409,12 @@ const char *mnt_statfs_get_fstype(struct statfs *vfs)
fe1ca8
 	return NULL;
fe1ca8
 }
fe1ca8
 
fe1ca8
+int is_procfs_fd(int fd)
fe1ca8
+{
fe1ca8
+	struct statfs sfs;
fe1ca8
+
fe1ca8
+	return fstatfs(fd, &sfs) == 0 && sfs.f_type == STATFS_PROC_MAGIC;
fe1ca8
+}
fe1ca8
 
fe1ca8
 /**
fe1ca8
  * mnt_match_fstype:
fe1ca8
@@ -1117,8 +1124,158 @@ done:
fe1ca8
 	return 1;
fe1ca8
 }
fe1ca8
 
fe1ca8
+#if defined(HAVE_FMEMOPEN) || defined(TEST_PROGRAM)
fe1ca8
+
fe1ca8
+/*
fe1ca8
+ * This function tries to minimize possible races when we read
fe1ca8
+ * /proc/#/{mountinfo,mount} files.
fe1ca8
+ *
fe1ca8
+ * The idea is to minimize number of read()s and check by poll() that during
fe1ca8
+ * the read the mount table has not been modified. If yes, than re-read it
fe1ca8
+ * (with some limitations to avoid never ending loop).
fe1ca8
+ *
fe1ca8
+ * Returns: <0 error, 0 success, 1 too many attempts
fe1ca8
+ */
fe1ca8
+static int read_procfs_file(int fd, char **buf, size_t *bufsiz)
fe1ca8
+{
fe1ca8
+	size_t bufmax = 0;
fe1ca8
+	int rc = 0, tries = 0, ninters = 0;
fe1ca8
+	char *bufptr = NULL;;
fe1ca8
+
fe1ca8
+	assert(buf);
fe1ca8
+	assert(bufsiz);
fe1ca8
+
fe1ca8
+	*bufsiz = 0;
fe1ca8
+	*buf = NULL;
fe1ca8
+
fe1ca8
+	do {
fe1ca8
+		ssize_t ret;
fe1ca8
+
fe1ca8
+		if (!bufptr || bufmax == *bufsiz) {
fe1ca8
+			char *tmp;
fe1ca8
+
fe1ca8
+			bufmax = bufmax ? bufmax * 2 : (16 * 1024);
fe1ca8
+			tmp = realloc(*buf, bufmax);
fe1ca8
+			if (!tmp)
fe1ca8
+				break;
fe1ca8
+			*buf = tmp;
fe1ca8
+			bufptr = tmp + *bufsiz;
fe1ca8
+		}
fe1ca8
+
fe1ca8
+		errno = 0;
fe1ca8
+		ret = read(fd, bufptr, bufmax - *bufsiz);
fe1ca8
+
fe1ca8
+		if (ret < 0) {
fe1ca8
+			/* error */
fe1ca8
+			if ((errno == EAGAIN || errno == EINTR) && (ninters++ < 5)) {
fe1ca8
+				xusleep(200000);
fe1ca8
+				continue;
fe1ca8
+			}
fe1ca8
+			break;
fe1ca8
+
fe1ca8
+		} else if (ret > 0) {
fe1ca8
+			/* success -- verify no event during read */
fe1ca8
+			struct pollfd fds[] = {
fe1ca8
+				{ .fd = fd, .events = POLLPRI }
fe1ca8
+			};
fe1ca8
+
fe1ca8
+			rc = poll(fds, 1, 0);
fe1ca8
+			if (rc < 0)
fe1ca8
+				break;		/* poll() error */
fe1ca8
+			if (rc > 0) {
fe1ca8
+				/* event -- read all again */
fe1ca8
+				if (lseek(fd, 0, SEEK_SET) != 0)
fe1ca8
+					break;
fe1ca8
+				*bufsiz = 0;
fe1ca8
+				bufptr = *buf;
fe1ca8
+				tries++;
fe1ca8
+
fe1ca8
+				if (tries > 10)
fe1ca8
+					/* busy system? -- wait */
fe1ca8
+					xusleep(10000);
fe1ca8
+				continue;
fe1ca8
+			}
fe1ca8
+
fe1ca8
+			/* successful read() without active poll() */
fe1ca8
+			(*bufsiz) += (size_t) ret;
fe1ca8
+			bufptr += ret;
fe1ca8
+			tries = ninters = 0;
fe1ca8
+		} else {
fe1ca8
+			/* end-of-file */
fe1ca8
+			goto success;
fe1ca8
+		}
fe1ca8
+	} while (tries <= 100);
fe1ca8
+
fe1ca8
+	rc = errno ? -errno : 1;
fe1ca8
+	free(*buf);
fe1ca8
+	return rc;
fe1ca8
+
fe1ca8
+success:
fe1ca8
+	return 0;
fe1ca8
+}
fe1ca8
+
fe1ca8
+/*
fe1ca8
+ * Create FILE stream for data from read_procfs_file()
fe1ca8
+ */
fe1ca8
+FILE *mnt_get_procfs_memstream(int fd, char **membuf)
fe1ca8
+{
fe1ca8
+	FILE *memf;
fe1ca8
+	size_t sz = 0;
fe1ca8
+	off_t cur;
fe1ca8
+
fe1ca8
+	/* in case of error, rewind to the original position */
fe1ca8
+	cur = lseek(fd, 0, SEEK_CUR);
fe1ca8
+
fe1ca8
+	if (read_procfs_file(fd, membuf, &sz) == 0
fe1ca8
+	    && sz > 0
fe1ca8
+	    && (memf = fmemopen(*membuf, sz, "r")))
fe1ca8
+		return memf;
fe1ca8
+
fe1ca8
+	/* error */
fe1ca8
+	lseek(fd, cur, SEEK_SET);
fe1ca8
+	return NULL;
fe1ca8
+}
fe1ca8
+#else
fe1ca8
+FILE *mnt_get_procfs_memstream(int fd __attribute((__unused__)),
fe1ca8
+		               char **membuf __attribute((__unused__)))
fe1ca8
+{
fe1ca8
+	return NULL;
fe1ca8
+}
fe1ca8
+#endif /* HAVE_FMEMOPEN */
fe1ca8
+
fe1ca8
 
fe1ca8
 #ifdef TEST_PROGRAM
fe1ca8
+static int test_proc_read(struct libmnt_test *ts, int argc, char *argv[])
fe1ca8
+{
fe1ca8
+	char *buf = NULL;
fe1ca8
+	char *filename = argv[1];
fe1ca8
+	size_t bufsiz = 0;
fe1ca8
+	int rc = 0, fd = open(filename, O_RDONLY);
fe1ca8
+
fe1ca8
+	if (fd <= 0) {
fe1ca8
+		warn("%s: cannot open", filename);
fe1ca8
+		return -errno;
fe1ca8
+	}
fe1ca8
+
fe1ca8
+	rc = read_procfs_file(fd, &buf, &bufsiz);
fe1ca8
+	close(fd);
fe1ca8
+
fe1ca8
+	switch (rc) {
fe1ca8
+	case 0:
fe1ca8
+		fwrite(buf, 1, bufsiz, stdout);
fe1ca8
+		free(buf);
fe1ca8
+		break;
fe1ca8
+	case 1:
fe1ca8
+		warnx("too many attempts");
fe1ca8
+		break;
fe1ca8
+	default:
fe1ca8
+		warn("%s: cannot read", filename);
fe1ca8
+		break;
fe1ca8
+	}
fe1ca8
+
fe1ca8
+	return rc;
fe1ca8
+}
fe1ca8
+
fe1ca8
 static int test_match_fstype(struct libmnt_test *ts, int argc, char *argv[])
fe1ca8
 {
fe1ca8
 	char *type = argv[1];
fe1ca8
@@ -1300,6 +1457,7 @@ int main(int argc, char *argv[])
fe1ca8
 	{ "--guess-root",    test_guess_root,      "[<maj:min>]" },
fe1ca8
 	{ "--mkdir",         test_mkdir,           "<path>" },
fe1ca8
 	{ "--statfs-type",   test_statfs_type,     "<path>" },
fe1ca8
+	{ "--read-procfs",   test_proc_read,       "<path>" },
fe1ca8
 
fe1ca8
 	{ NULL }
fe1ca8
 	};
fe1ca8
-- 
fe1ca8
2.21.0
fe1ca8