Blame SOURCES/0027-libmount-improve-mountinfo-reliability.patch

da180f
From 32fe4f1dd8fbc104bd848e24de613122947f095a Mon Sep 17 00:00:00 2001
da180f
From: Karel Zak <kzak@redhat.com>
da180f
Date: Wed, 28 Aug 2019 15:47:16 +0200
da180f
Subject: [PATCH] libmount: improve mountinfo reliability
da180f
MIME-Version: 1.0
da180f
Content-Type: text/plain; charset=UTF-8
da180f
Content-Transfer-Encoding: 8bit
da180f
da180f
The standard way how we read mount table is not reliable because
da180f
during the read() syscalls the table may be modified by some another
da180f
process. The changes in the table is possible to detect by poll()
da180f
event, and in this case it seems better to lseek to the begin of the file
da180f
and read it again. It's expensive, but better than races...
da180f
da180f
This patch does not modify mountinfo parser, but it reads all file to
da180f
memory (by read()+poll()) and than it creates memory stream
da180f
from the buffer and use it rather than a regular file stream.
da180f
da180f
It means the parser is still possible to use for normal files
da180f
(e.g. fstab) as well as for mountinfo and it's also portable to
da180f
systems where for some reason is no fmemopen().
da180f
da180f
Addresses: https://github.com/systemd/systemd/issues/10872
da180f
Addresses: https://bugzilla.redhat.com/show_bug.cgi?id=1751447
da180f
Upstream: http://github.com/karelzak/util-linux/commit/e4925f591c1bfb83719418b56b952830d15b77eb
da180f
Upstream: http://github.com/karelzak/util-linux/commit/ee551c909f95437fd9fcd162f398c069d0ce9720
da180f
Reported-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
da180f
Signed-off-by: Karel Zak <kzak@redhat.com>
da180f
---
da180f
 configure.ac             |   1 +
da180f
 libmount/src/mountP.h    |   2 +
da180f
 libmount/src/tab_parse.c |  87 +++++++++++++++++----
da180f
 libmount/src/utils.c     | 158 +++++++++++++++++++++++++++++++++++++++
da180f
 4 files changed, 233 insertions(+), 15 deletions(-)
da180f
da180f
diff --git a/configure.ac b/configure.ac
da180f
index a05a294ad..245004890 100644
da180f
--- a/configure.ac
da180f
+++ b/configure.ac
da180f
@@ -456,6 +456,7 @@ AC_CHECK_FUNCS([ \
da180f
 	err \
da180f
 	errx \
da180f
 	explicit_bzero \
da180f
+	fmemopen \
da180f
 	fsync \
da180f
 	utimensat \
da180f
 	getdomainname \
da180f
diff --git a/libmount/src/mountP.h b/libmount/src/mountP.h
da180f
index d47d26442..52a238ef3 100644
da180f
--- a/libmount/src/mountP.h
da180f
+++ b/libmount/src/mountP.h
da180f
@@ -93,6 +93,7 @@ extern int mnt_valid_tagname(const char *tagname);
da180f
 extern int append_string(char **a, const char *b);
da180f
 
da180f
 extern const char *mnt_statfs_get_fstype(struct statfs *vfs);
da180f
+extern int is_procfs_fd(int fd);
da180f
 extern int is_file_empty(const char *name);
da180f
 
da180f
 extern int mnt_is_readonly(const char *path)
da180f
@@ -118,6 +119,7 @@ extern void mnt_free_filesystems(char **filesystems);
da180f
 extern char *mnt_get_kernel_cmdline_option(const char *name);
da180f
 extern int mnt_guess_system_root(dev_t devno, struct libmnt_cache *cache, char **path);
da180f
 extern int mnt_stat_mountpoint(const char *target, struct stat *st);
da180f
+extern FILE *mnt_get_procfs_memstream(int fd, char **membuf);
da180f
 
da180f
 /* tab.c */
da180f
 extern int is_mountinfo(struct libmnt_table *tb);
da180f
diff --git a/libmount/src/tab_parse.c b/libmount/src/tab_parse.c
da180f
index 3ed84ebc2..10fc68279 100644
da180f
--- a/libmount/src/tab_parse.c
da180f
+++ b/libmount/src/tab_parse.c
da180f
@@ -603,15 +603,7 @@ static int kernel_fs_postparse(struct libmnt_table *tb,
da180f
 	return rc;
da180f
 }
da180f
 
da180f
-/**
da180f
- * mnt_table_parse_stream:
da180f
- * @tb: tab pointer
da180f
- * @f: file stream
da180f
- * @filename: filename used for debug and error messages
da180f
- *
da180f
- * Returns: 0 on success, negative number in case of error.
da180f
- */
da180f
-int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
da180f
+static int __table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
da180f
 {
da180f
 	int rc = -1;
da180f
 	int flags = 0;
da180f
@@ -685,6 +677,40 @@ err:
da180f
 	return rc;
da180f
 }
da180f
 
da180f
+/**
da180f
+ * mnt_table_parse_stream:
da180f
+ * @tb: tab pointer
da180f
+ * @f: file stream
da180f
+ * @filename: filename used for debug and error messages
da180f
+ *
da180f
+ * Returns: 0 on success, negative number in case of error.
da180f
+ */
da180f
+int mnt_table_parse_stream(struct libmnt_table *tb, FILE *f, const char *filename)
da180f
+{
da180f
+	int fd, rc;
da180f
+	FILE *memf = NULL;
da180f
+	char *membuf = NULL;
da180f
+
da180f
+	/*
da180f
+	 * For /proc/#/{mountinfo,mount} we read all file to memory and use it
da180f
+	 * as memory stream. For more details see mnt_read_procfs_file().
da180f
+	 */
da180f
+	if ((fd = fileno(f)) >= 0
da180f
+	    && (tb->fmt == MNT_FMT_GUESS ||
da180f
+		tb->fmt == MNT_FMT_MOUNTINFO ||
da180f
+		tb->fmt == MNT_FMT_MTAB)
da180f
+	    && is_procfs_fd(fd)
da180f
+	    && (memf = mnt_get_procfs_memstream(fd, &membuf))) {
da180f
+
da180f
+		rc = __table_parse_stream(tb, memf, filename);
da180f
+		fclose(memf);
da180f
+		free(membuf);
da180f
+	} else
da180f
+		rc = __table_parse_stream(tb, f, filename);
da180f
+
da180f
+	return rc;
da180f
+}
da180f
+
da180f
 /**
da180f
  * mnt_table_parse_file:
da180f
  * @tb: tab pointer
da180f
@@ -700,18 +726,49 @@ err:
da180f
 int mnt_table_parse_file(struct libmnt_table *tb, const char *filename)
da180f
 {
da180f
 	FILE *f;
da180f
-	int rc;
da180f
+	int rc, fd = -1;
da180f
 
da180f
 	if (!filename || !tb)
da180f
 		return -EINVAL;
da180f
 
da180f
-	f = fopen(filename, "r" UL_CLOEXECSTR);
da180f
+	/*
da180f
+	 * Try to use read()+poll() to realiably read all
da180f
+	 * /proc/#/{mount,mountinfo} file to memory
da180f
+	 */
da180f
+	if (tb->fmt != MNT_FMT_SWAPS
da180f
+	    && strncmp(filename, "/proc/", 6) == 0) {
da180f
+
da180f
+		FILE *memf;
da180f
+		char *membuf = NULL;
da180f
+
da180f
+		fd = open(filename, O_RDONLY|O_CLOEXEC);
da180f
+		if (fd < 0) {
da180f
+			rc = -errno;
da180f
+			goto done;
da180f
+		}
da180f
+		memf = mnt_get_procfs_memstream(fd, &membuf);
da180f
+		if (memf) {
da180f
+			rc = __table_parse_stream(tb, memf, filename);
da180f
+
da180f
+			fclose(memf);
da180f
+			free(membuf);
da180f
+			close(fd);
da180f
+			goto done;
da180f
+		}
da180f
+		/* else fallback to fopen/fdopen() */
da180f
+	}
da180f
+
da180f
+	if (fd >= 0)
da180f
+		f = fdopen(fd, "r" UL_CLOEXECSTR);
da180f
+	else
da180f
+		f = fopen(filename, "r" UL_CLOEXECSTR);
da180f
+
da180f
 	if (f) {
da180f
-		rc = mnt_table_parse_stream(tb, f, filename);
da180f
+		rc = __table_parse_stream(tb, f, filename);
da180f
 		fclose(f);
da180f
 	} else
da180f
 		rc = -errno;
da180f
-
da180f
+done:
da180f
 	DBG(TAB, ul_debugobj(tb, "parsing done [filename=%s, rc=%d]", filename, rc));
da180f
 	return rc;
da180f
 }
da180f
@@ -768,7 +825,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
da180f
 
da180f
 		f = fopen_at(dd, d->d_name, O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
da180f
 		if (f) {
da180f
-			mnt_table_parse_stream(tb, f, d->d_name);
da180f
+			__table_parse_stream(tb, f, d->d_name);
da180f
 			fclose(f);
da180f
 		}
da180f
 	}
da180f
@@ -809,7 +866,7 @@ static int __mnt_table_parse_dir(struct libmnt_table *tb, const char *dirname)
da180f
 		f = fopen_at(dirfd(dir), d->d_name,
da180f
 				O_RDONLY|O_CLOEXEC, "r" UL_CLOEXECSTR);
da180f
 		if (f) {
da180f
-			mnt_table_parse_stream(tb, f, d->d_name);
da180f
+			__table_parse_stream(tb, f, d->d_name);
da180f
 			fclose(f);
da180f
 		}
da180f
 	}
da180f
diff --git a/libmount/src/utils.c b/libmount/src/utils.c
da180f
index c36187c07..f7d85d124 100644
da180f
--- a/libmount/src/utils.c
da180f
+++ b/libmount/src/utils.c
da180f
@@ -14,6 +14,7 @@
da180f
 #include <fcntl.h>
da180f
 #include <pwd.h>
da180f
 #include <grp.h>
da180f
+#include <poll.h>
da180f
 #include <blkid.h>
da180f
 
da180f
 #include "strutils.h"
da180f
@@ -408,6 +409,12 @@ const char *mnt_statfs_get_fstype(struct statfs *vfs)
da180f
 	return NULL;
da180f
 }
da180f
 
da180f
+int is_procfs_fd(int fd)
da180f
+{
da180f
+	struct statfs sfs;
da180f
+
da180f
+	return fstatfs(fd, &sfs) == 0 && sfs.f_type == STATFS_PROC_MAGIC;
da180f
+}
da180f
 
da180f
 /**
da180f
  * mnt_match_fstype:
da180f
@@ -1117,8 +1124,158 @@ done:
da180f
 	return 1;
da180f
 }
da180f
 
da180f
+#if defined(HAVE_FMEMOPEN) || defined(TEST_PROGRAM)
da180f
+
da180f
+/*
da180f
+ * This function tries to minimize possible races when we read
da180f
+ * /proc/#/{mountinfo,mount} files.
da180f
+ *
da180f
+ * The idea is to minimize number of read()s and check by poll() that during
da180f
+ * the read the mount table has not been modified. If yes, than re-read it
da180f
+ * (with some limitations to avoid never ending loop).
da180f
+ *
da180f
+ * Returns: <0 error, 0 success, 1 too many attempts
da180f
+ */
da180f
+static int read_procfs_file(int fd, char **buf, size_t *bufsiz)
da180f
+{
da180f
+	size_t bufmax = 0;
da180f
+	int rc = 0, tries = 0, ninters = 0;
da180f
+	char *bufptr = NULL;;
da180f
+
da180f
+	assert(buf);
da180f
+	assert(bufsiz);
da180f
+
da180f
+	*bufsiz = 0;
da180f
+	*buf = NULL;
da180f
+
da180f
+	do {
da180f
+		ssize_t ret;
da180f
+
da180f
+		if (!bufptr || bufmax == *bufsiz) {
da180f
+			char *tmp;
da180f
+
da180f
+			bufmax = bufmax ? bufmax * 2 : (16 * 1024);
da180f
+			tmp = realloc(*buf, bufmax);
da180f
+			if (!tmp)
da180f
+				break;
da180f
+			*buf = tmp;
da180f
+			bufptr = tmp + *bufsiz;
da180f
+		}
da180f
+
da180f
+		errno = 0;
da180f
+		ret = read(fd, bufptr, bufmax - *bufsiz);
da180f
+
da180f
+		if (ret < 0) {
da180f
+			/* error */
da180f
+			if ((errno == EAGAIN || errno == EINTR) && (ninters++ < 5)) {
da180f
+				xusleep(200000);
da180f
+				continue;
da180f
+			}
da180f
+			break;
da180f
+
da180f
+		} else if (ret > 0) {
da180f
+			/* success -- verify no event during read */
da180f
+			struct pollfd fds[] = {
da180f
+				{ .fd = fd, .events = POLLPRI }
da180f
+			};
da180f
+
da180f
+			rc = poll(fds, 1, 0);
da180f
+			if (rc < 0)
da180f
+				break;		/* poll() error */
da180f
+			if (rc > 0) {
da180f
+				/* event -- read all again */
da180f
+				if (lseek(fd, 0, SEEK_SET) != 0)
da180f
+					break;
da180f
+				*bufsiz = 0;
da180f
+				bufptr = *buf;
da180f
+				tries++;
da180f
+
da180f
+				if (tries > 10)
da180f
+					/* busy system? -- wait */
da180f
+					xusleep(10000);
da180f
+				continue;
da180f
+			}
da180f
+
da180f
+			/* successful read() without active poll() */
da180f
+			(*bufsiz) += (size_t) ret;
da180f
+			bufptr += ret;
da180f
+			tries = ninters = 0;
da180f
+		} else {
da180f
+			/* end-of-file */
da180f
+			goto success;
da180f
+		}
da180f
+	} while (tries <= 100);
da180f
+
da180f
+	rc = errno ? -errno : 1;
da180f
+	free(*buf);
da180f
+	return rc;
da180f
+
da180f
+success:
da180f
+	return 0;
da180f
+}
da180f
+
da180f
+/*
da180f
+ * Create FILE stream for data from read_procfs_file()
da180f
+ */
da180f
+FILE *mnt_get_procfs_memstream(int fd, char **membuf)
da180f
+{
da180f
+	FILE *memf;
da180f
+	size_t sz = 0;
da180f
+	off_t cur;
da180f
+
da180f
+	/* in case of error, rewind to the original position */
da180f
+	cur = lseek(fd, 0, SEEK_CUR);
da180f
+
da180f
+	if (read_procfs_file(fd, membuf, &sz) == 0
da180f
+	    && sz > 0
da180f
+	    && (memf = fmemopen(*membuf, sz, "r")))
da180f
+		return memf;
da180f
+
da180f
+	/* error */
da180f
+	lseek(fd, cur, SEEK_SET);
da180f
+	return NULL;
da180f
+}
da180f
+#else
da180f
+FILE *mnt_get_procfs_memstream(int fd __attribute((__unused__)),
da180f
+		               char **membuf __attribute((__unused__)))
da180f
+{
da180f
+	return NULL;
da180f
+}
da180f
+#endif /* HAVE_FMEMOPEN */
da180f
+
da180f
 
da180f
 #ifdef TEST_PROGRAM
da180f
+static int test_proc_read(struct libmnt_test *ts, int argc, char *argv[])
da180f
+{
da180f
+	char *buf = NULL;
da180f
+	char *filename = argv[1];
da180f
+	size_t bufsiz = 0;
da180f
+	int rc = 0, fd = open(filename, O_RDONLY);
da180f
+
da180f
+	if (fd <= 0) {
da180f
+		warn("%s: cannot open", filename);
da180f
+		return -errno;
da180f
+	}
da180f
+
da180f
+	rc = read_procfs_file(fd, &buf, &bufsiz);
da180f
+	close(fd);
da180f
+
da180f
+	switch (rc) {
da180f
+	case 0:
da180f
+		fwrite(buf, 1, bufsiz, stdout);
da180f
+		free(buf);
da180f
+		break;
da180f
+	case 1:
da180f
+		warnx("too many attempts");
da180f
+		break;
da180f
+	default:
da180f
+		warn("%s: cannot read", filename);
da180f
+		break;
da180f
+	}
da180f
+
da180f
+	return rc;
da180f
+}
da180f
+
da180f
 static int test_match_fstype(struct libmnt_test *ts, int argc, char *argv[])
da180f
 {
da180f
 	char *type = argv[1];
da180f
@@ -1300,6 +1457,7 @@ int main(int argc, char *argv[])
da180f
 	{ "--guess-root",    test_guess_root,      "[<maj:min>]" },
da180f
 	{ "--mkdir",         test_mkdir,           "<path>" },
da180f
 	{ "--statfs-type",   test_statfs_type,     "<path>" },
da180f
+	{ "--read-procfs",   test_proc_read,       "<path>" },
da180f
 
da180f
 	{ NULL }
da180f
 	};
da180f
-- 
da180f
2.21.0
da180f