Blob Blame History Raw
From 80c7e812dffa734599aadde93cb8e30b34f0983d Mon Sep 17 00:00:00 2001
From: Joe Lawrence <joe.lawrence@redhat.com>
Date: Mon, 21 Mar 2022 15:45:03 -0400
Subject: [KPATCH CVE-2021-4083] fget: kpatch fixes for CVE-2021-4083

Kernels:
3.10.0-1160.24.1.el7
3.10.0-1160.25.1.el7
3.10.0-1160.31.1.el7
3.10.0-1160.36.2.el7
3.10.0-1160.41.1.el7
3.10.0-1160.42.2.el7
3.10.0-1160.45.1.el7
3.10.0-1160.49.1.el7
3.10.0-1160.53.1.el7
3.10.0-1160.59.1.el7

Changes since last build:
arches: x86_64 ppc64le
file.o: changed function: SyS_dup
file.o: changed function: dup_fd
file.o: changed function: fget
file.o: changed function: fget_light
file.o: changed function: fget_raw
file.o: changed function: fget_raw_light
file.o: changed function: put_files_struct
file.o: new function: __fget
file.o: new function: __fget_light
---------------------------

Kpatch-MR: https://gitlab.com/redhat/prdsc/rhel/src/kpatch/rhel-7/-/merge_requests/34
Approved-by: Yannick Cote (@ycote1)
Modifications:
- include/linux/rcupdate.h, kernel/rcupdate.c: leave exported
  rcu_my_thread_group_empty() intact
- fs/file.c: use fput() instead of fputs_many() since we skipped commit
  ("fs: add fget_many() and fput_many()")
- fs/file.c: use fcheck_files() instead of files_lookup_fd_raw() since
  we are skipping subsequent commit ("fget: clarify and improve
  __fget_files() implementation") that provides it.
- Set __attribute__((optimize("-fno-optimize-sibling-calls"))) for
  fget() and fget_raw() on ppc64le

commit c2207a235113315ad696b06eb96ccd36d1f5fdeb
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:29 2022 +0100

    introduce __fcheck_files() to fix rcu_dereference_check_fdtable(), kill rcu_my_thread_group_empty()

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083
    Conflicts:
            - context difference due to backport of later patch
            - target file difference due to missing backport of rcu source code
              move

    commit a8d4b8345e0ee48b732126d980efaf0dc373e2b0
    Author: Oleg Nesterov <oleg@redhat.com>
    Date:   Sat Jan 11 19:19:32 2014 +0100

        introduce __fcheck_files() to fix rcu_dereference_check_fdtable(), kill rcu_my_thread_group_empty()

        rcu_dereference_check_fdtable() looks very wrong,

        1. rcu_my_thread_group_empty() was added by 844b9a8707f1 "vfs: fix
           RCU-lockdep false positive due to /proc" but it doesn't really
           fix the problem. A CLONE_THREAD (without CLONE_FILES) task can
           hit the same race with get_files_struct().

           And otoh rcu_my_thread_group_empty() can suppress the correct
           warning if the caller is the CLONE_FILES (without CLONE_THREAD)
           task.

        2. files->count == 1 check is not really right too. Even if this
           files_struct is not shared it is not safe to access it lockless
           unless the caller is the owner.

           Otoh, this check is sub-optimal. files->count == 0 always means
           it is safe to use it lockless even if files != current->files,
           but put_files_struct() has to take rcu_read_lock(). See the next
           patch.

        This patch removes the buggy checks and turns fcheck_files() into
        __fcheck_files() which uses rcu_dereference_raw(), the "unshared"
        callers, fget_light() and fget_raw_light(), can use it to avoid
        the warning from RCU-lockdep.

        fcheck_files() is trivially reimplemented as rcu_lockdep_assert()
        plus __fcheck_files().

        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

commit ec06bac02991edcfdeb148ab2fe7f3e2d7d3ceaa
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:30 2022 +0100

    fs: factor out common code in fget() and fget_raw()

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083
    Conflicts:
            - difference due to backport of later patch

    commit 1deb46e2562561255c34075825fd00f22a858bb3
    Author: Oleg Nesterov <oleg@redhat.com>
    Date:   Mon Jan 13 16:48:19 2014 +0100

        fs: factor out common code in fget() and fget_raw()

        Apart from FMODE_PATH check fget() and fget_raw() are identical,
        shift the code into the new simple helper, __fget(fd, mask). Saves
        160 bytes.

        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

commit ac43fab520f6836e2a7d3d20dd64d6328233ccbe
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:30 2022 +0100

    fs: factor out common code in fget_light() and fget_raw_light()

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083

    commit ad46183445043b562856c60b74db664668fb364b
    Author: Oleg Nesterov <oleg@redhat.com>
    Date:   Mon Jan 13 16:48:40 2014 +0100

        fs: factor out common code in fget_light() and fget_raw_light()

        Apart from FMODE_PATH check fget_light() and fget_raw_light() are
        identical, shift the code into the new helper, __fget_light(fd, mask).
        Saves 208 bytes.

        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

commit 9e24c8894f5df488a336f0c848f15a7d2f78d163
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:30 2022 +0100

    fs: __fget_light() can use __fget() in slow path

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083

    commit e6ff9a9fa4e05c1c03dec63cdc6a87d6dea02755
    Author: Oleg Nesterov <oleg@redhat.com>
    Date:   Mon Jan 13 16:49:06 2014 +0100

        fs: __fget_light() can use __fget() in slow path

        The slow path in __fget_light() can use __fget() to avoid the
        code duplication. Saves 232 bytes.

        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

commit d63fb584ae2d7d9a1620e23e59072cb6929f3833
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:30 2022 +0100

    fs/file.c: __fget() and dup2() atomicity rules

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083

    commit 5ba97d2832f87943c43bb69cb1ef86dbc59df5bc
    Author: Eric Dumazet <edumazet@google.com>
    Date:   Mon Jun 29 17:10:30 2015 +0200

        fs/file.c: __fget() and dup2() atomicity rules

        __fget() does lockless fetch of pointer from the descriptor
        table, attempts to grab a reference and treats "it was already
        zero" as "it's already gone from the table, we just hadn't
        seen the store, let's fail".  Unfortunately, that breaks the
        atomicity of dup2() - __fget() might see the old pointer,
        notice that it's been already dropped and treat that as
        "it's closed".  What we should be getting is either the
        old file or new one, depending whether we come before or after
        dup2().

        Dmitry had following test failing sometimes :

        int fd;
        void *Thread(void *x) {
          char buf;
          int n = read(fd, &buf, 1);
          if (n != 1)
            exit(printf("read failed: n=%d errno=%d\n", n, errno));
          return 0;
        }

        int main()
        {
          fd = open("/dev/urandom", O_RDONLY);
          int fd2 = open("/dev/urandom", O_RDONLY);
          if (fd == -1 || fd2 == -1)
            exit(printf("open failed\n"));
          pthread_t th;
          pthread_create(&th, 0, Thread, 0);
          if (dup2(fd2, fd) == -1)
            exit(printf("dup2 failed\n"));
          pthread_join(th, 0);
          if (close(fd) == -1)
            exit(printf("close failed\n"));
          if (close(fd2) == -1)
            exit(printf("close failed\n"));
          printf("DONE\n");
          return 0;
        }

        Signed-off-by: Eric Dumazet <edumazet@google.com>
        Reported-by: Dmitry Vyukov <dvyukov@google.com>
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

commit bc04a10c9303dcd9a6305a0452361537257fa0c1
Author: Miklos Szeredi <mszeredi@redhat.com>
Date:   Fri Jan 21 10:22:31 2022 +0100

    fget: check that the fd still exists after getting a ref to it

    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
    Upstream status: Linus
    Testing: xfstests
    CVE: CVE-2021-4083

    commit 054aa8d439b9185d4f5eb9a90282d1ce74772969
    Author: Linus Torvalds <torvalds@linux-foundation.org>
    Date:   Wed Dec 1 10:06:14 2021 -0800

        fget: check that the fd still exists after getting a ref to it

        Jann Horn points out that there is another possible race wrt Unix domain
        socket garbage collection, somewhat reminiscent of the one fixed in
        commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK").

        See the extended comment about the garbage collection requirements added
        to unix_peek_fds() by that commit for details.

        The race comes from how we can locklessly look up a file descriptor just
        as it is in the process of being closed, and with the right artificial
        timing (Jann added a few strategic 'mdelay(500)' calls to do that), the
        Unix domain socket garbage collector could see the reference count
        decrement of the close() happen before fget() took its reference to the
        file and the file was attached onto a new file descriptor.

        This is all (intentionally) correct on the 'struct file *' side, with
        RCU lookups and lockless reference counting very much part of the
        design.  Getting that reference count out of order isn't a problem per
        se.

        But the garbage collector can get confused by seeing this situation of
        having seen a file not having any remaining external references and then
        seeing it being attached to an fd.

        In commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK") the
        fix was to serialize the file descriptor install with the garbage
        collector by taking and releasing the unix_gc_lock.

        That's not really an option here, but since this all happens when we are
        in the process of looking up a file descriptor, we can instead simply
        just re-check that the file hasn't been closed in the meantime, and just
        re-do the lookup if we raced with a concurrent close() of the same file
        descriptor.

        Reported-and-tested-by: Jann Horn <jannh@google.com>
        Acked-by: Miklos Szeredi <mszeredi@redhat.com>
        Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
---
 fs/file.c               | 86 +++++++++++++++--------------------------
 include/linux/fdtable.h | 35 ++++++++++-------
 2 files changed, 53 insertions(+), 68 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 44bd634b636a..564d60bf0fda 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -718,42 +718,43 @@ void do_close_on_exec(struct files_struct *files)
 	spin_unlock(&files->file_lock);
 }
 
-struct file *fget(unsigned int fd)
+static struct file *__fget(unsigned int fd, fmode_t mask)
 {
-	struct file *file;
 	struct files_struct *files = current->files;
+	struct file *file;
 
 	rcu_read_lock();
+loop:
 	file = fcheck_files(files, fd);
 	if (file) {
-		/* File object ref couldn't be taken */
-		if (file->f_mode & FMODE_PATH || !get_file_rcu(file))
+		/* File object ref couldn't be taken.
+		 * dup2() atomicity guarantee is the reason
+		 * we loop to catch the new file (or NULL pointer)
+		 */
+		if (file->f_mode & mask)
 			file = NULL;
+		else if (!get_file_rcu(file))
+			goto loop;
+		else if (fcheck_files(files, fd) != file) {
+			fput(file);
+			goto loop;
+		}
 	}
 	rcu_read_unlock();
 
 	return file;
 }
 
+__attribute__((optimize("-fno-optimize-sibling-calls"))) struct file *fget(unsigned int fd)
+{
+	return __fget(fd, FMODE_PATH);
+}
 EXPORT_SYMBOL(fget);
 
-struct file *fget_raw(unsigned int fd)
+__attribute__((optimize("-fno-optimize-sibling-calls"))) struct file *fget_raw(unsigned int fd)
 {
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file) {
-		/* File object ref couldn't be taken */
-		if (!atomic_long_inc_not_zero(&file->f_count))
-			file = NULL;
-	}
-	rcu_read_unlock();
-
-	return file;
+	return __fget(fd, 0);
 }
-
 EXPORT_SYMBOL(fget_raw);
 
 /*
@@ -772,56 +773,33 @@ EXPORT_SYMBOL(fget_raw);
  * The fput_needed flag returned by fget_light should be passed to the
  * corresponding fput_light.
  */
-struct file *fget_light(unsigned int fd, int *fput_needed)
+struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed)
 {
-	struct file *file;
 	struct files_struct *files = current->files;
+	struct file *file;
 
 	*fput_needed = 0;
 	if (atomic_read(&files->count) == 1) {
-		file = fcheck_files(files, fd);
-		if (file && (file->f_mode & FMODE_PATH))
+		file = __fcheck_files(files, fd);
+		if (file && (file->f_mode & mask))
 			file = NULL;
 	} else {
-		rcu_read_lock();
-		file = fcheck_files(files, fd);
-		if (file) {
-			if (!(file->f_mode & FMODE_PATH) &&
-			    atomic_long_inc_not_zero(&file->f_count))
-				*fput_needed = 1;
-			else
-				/* Didn't get the reference, someone's freed */
-				file = NULL;
-		}
-		rcu_read_unlock();
+		file = __fget(fd, mask);
+		if (file)
+			*fput_needed = 1;
 	}
 
 	return file;
 }
+struct file *fget_light(unsigned int fd, int *fput_needed)
+{
+	return __fget_light(fd, FMODE_PATH, fput_needed);
+}
 EXPORT_SYMBOL(fget_light);
 
 struct file *fget_raw_light(unsigned int fd, int *fput_needed)
 {
-	struct file *file;
-	struct files_struct *files = current->files;
-
-	*fput_needed = 0;
-	if (atomic_read(&files->count) == 1) {
-		file = fcheck_files(files, fd);
-	} else {
-		rcu_read_lock();
-		file = fcheck_files(files, fd);
-		if (file) {
-			if (atomic_long_inc_not_zero(&file->f_count))
-				*fput_needed = 1;
-			else
-				/* Didn't get the reference, someone's freed */
-				file = NULL;
-		}
-		rcu_read_unlock();
-	}
-
-	return file;
+	return __fget_light(fd, 0, fput_needed);
 }
 
 void set_close_on_exec(unsigned int fd, int flag)
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 88d74ca9418f..95bcca7c1a0f 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -70,29 +70,36 @@ struct files_struct {
 	RH_KABI_EXTEND(wait_queue_head_t resize_wait)
 };
 
-#define rcu_dereference_check_fdtable(files, fdtfd) \
-	(rcu_dereference_check((fdtfd), \
-			       lockdep_is_held(&(files)->file_lock) || \
-			       atomic_read(&(files)->count) == 1 || \
-			       rcu_my_thread_group_empty()))
-
-#define files_fdtable(files) \
-		(rcu_dereference_check_fdtable((files), (files)->fdt))
-
 struct file_operations;
 struct vfsmount;
 struct dentry;
 
-static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
+#define rcu_dereference_check_fdtable(files, fdtfd) \
+	rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
+
+#define files_fdtable(files) \
+	rcu_dereference_check_fdtable((files), (files)->fdt)
+
+/*
+ * The caller must ensure that fd table isn't shared or hold rcu or file lock
+ */
+static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
 {
-	struct file * file = NULL;
-	struct fdtable *fdt = files_fdtable(files);
+	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
 
 	if (fd < fdt->max_fds) {
 		fd = array_index_nospec(fd, fdt->max_fds);
-		file = rcu_dereference_check_fdtable(files, fdt->fd[fd]);
+		return rcu_dereference_raw(fdt->fd[fd]);
 	}
-	return file;
+	return NULL;
+}
+
+static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
+{
+	rcu_lockdep_assert(rcu_read_lock_held() ||
+			   lockdep_is_held(&files->file_lock),
+			   "suspicious rcu_dereference_check() usage");
+	return __fcheck_files(files, fd);
 }
 
 /*
-- 
2.26.3