Blame SOURCES/CVE-2021-4083.patch

3283a7
From 80c7e812dffa734599aadde93cb8e30b34f0983d Mon Sep 17 00:00:00 2001
3283a7
From: Joe Lawrence <joe.lawrence@redhat.com>
3283a7
Date: Mon, 21 Mar 2022 15:45:03 -0400
3283a7
Subject: [KPATCH CVE-2021-4083] fget: kpatch fixes for CVE-2021-4083
3283a7
3283a7
Kernels:
3283a7
3.10.0-1160.24.1.el7
3283a7
3.10.0-1160.25.1.el7
3283a7
3.10.0-1160.31.1.el7
3283a7
3.10.0-1160.36.2.el7
3283a7
3.10.0-1160.41.1.el7
3283a7
3.10.0-1160.42.2.el7
3283a7
3.10.0-1160.45.1.el7
3283a7
3.10.0-1160.49.1.el7
3283a7
3.10.0-1160.53.1.el7
3283a7
3.10.0-1160.59.1.el7
3283a7
3283a7
Changes since last build:
3283a7
arches: x86_64 ppc64le
3283a7
file.o: changed function: SyS_dup
3283a7
file.o: changed function: dup_fd
3283a7
file.o: changed function: fget
3283a7
file.o: changed function: fget_light
3283a7
file.o: changed function: fget_raw
3283a7
file.o: changed function: fget_raw_light
3283a7
file.o: changed function: put_files_struct
3283a7
file.o: new function: __fget
3283a7
file.o: new function: __fget_light
3283a7
---------------------------
3283a7
3283a7
Kpatch-MR: https://gitlab.com/redhat/prdsc/rhel/src/kpatch/rhel-7/-/merge_requests/34
3283a7
Approved-by: Yannick Cote (@ycote1)
3283a7
Modifications:
3283a7
- include/linux/rcupdate.h, kernel/rcupdate.c: leave exported
3283a7
  rcu_my_thread_group_empty() intact
3283a7
- fs/file.c: use fput() instead of fputs_many() since we skipped commit
3283a7
  ("fs: add fget_many() and fput_many()")
3283a7
- fs/file.c: use fcheck_files() instead of files_lookup_fd_raw() since
3283a7
  we are skipping subsequent commit ("fget: clarify and improve
3283a7
  __fget_files() implementation") that provides it.
3283a7
- Set __attribute__((optimize("-fno-optimize-sibling-calls"))) for
3283a7
  fget() and fget_raw() on ppc64le
3283a7
3283a7
commit c2207a235113315ad696b06eb96ccd36d1f5fdeb
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:29 2022 +0100
3283a7
3283a7
    introduce __fcheck_files() to fix rcu_dereference_check_fdtable(), kill rcu_my_thread_group_empty()
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
    Conflicts:
3283a7
            - context difference due to backport of later patch
3283a7
            - target file difference due to missing backport of rcu source code
3283a7
              move
3283a7
3283a7
    commit a8d4b8345e0ee48b732126d980efaf0dc373e2b0
3283a7
    Author: Oleg Nesterov <oleg@redhat.com>
3283a7
    Date:   Sat Jan 11 19:19:32 2014 +0100
3283a7
3283a7
        introduce __fcheck_files() to fix rcu_dereference_check_fdtable(), kill rcu_my_thread_group_empty()
3283a7
3283a7
        rcu_dereference_check_fdtable() looks very wrong,
3283a7
3283a7
        1. rcu_my_thread_group_empty() was added by 844b9a8707f1 "vfs: fix
3283a7
           RCU-lockdep false positive due to /proc" but it doesn't really
3283a7
           fix the problem. A CLONE_THREAD (without CLONE_FILES) task can
3283a7
           hit the same race with get_files_struct().
3283a7
3283a7
           And otoh rcu_my_thread_group_empty() can suppress the correct
3283a7
           warning if the caller is the CLONE_FILES (without CLONE_THREAD)
3283a7
           task.
3283a7
3283a7
        2. files->count == 1 check is not really right too. Even if this
3283a7
           files_struct is not shared it is not safe to access it lockless
3283a7
           unless the caller is the owner.
3283a7
3283a7
           Otoh, this check is sub-optimal. files->count == 0 always means
3283a7
           it is safe to use it lockless even if files != current->files,
3283a7
           but put_files_struct() has to take rcu_read_lock(). See the next
3283a7
           patch.
3283a7
3283a7
        This patch removes the buggy checks and turns fcheck_files() into
3283a7
        __fcheck_files() which uses rcu_dereference_raw(), the "unshared"
3283a7
        callers, fget_light() and fget_raw_light(), can use it to avoid
3283a7
        the warning from RCU-lockdep.
3283a7
3283a7
        fcheck_files() is trivially reimplemented as rcu_lockdep_assert()
3283a7
        plus __fcheck_files().
3283a7
3283a7
        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
3283a7
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
commit ec06bac02991edcfdeb148ab2fe7f3e2d7d3ceaa
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:30 2022 +0100
3283a7
3283a7
    fs: factor out common code in fget() and fget_raw()
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
    Conflicts:
3283a7
            - difference due to backport of later patch
3283a7
3283a7
    commit 1deb46e2562561255c34075825fd00f22a858bb3
3283a7
    Author: Oleg Nesterov <oleg@redhat.com>
3283a7
    Date:   Mon Jan 13 16:48:19 2014 +0100
3283a7
3283a7
        fs: factor out common code in fget() and fget_raw()
3283a7
3283a7
        Apart from FMODE_PATH check fget() and fget_raw() are identical,
3283a7
        shift the code into the new simple helper, __fget(fd, mask). Saves
3283a7
        160 bytes.
3283a7
3283a7
        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
3283a7
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
commit ac43fab520f6836e2a7d3d20dd64d6328233ccbe
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:30 2022 +0100
3283a7
3283a7
    fs: factor out common code in fget_light() and fget_raw_light()
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
3283a7
    commit ad46183445043b562856c60b74db664668fb364b
3283a7
    Author: Oleg Nesterov <oleg@redhat.com>
3283a7
    Date:   Mon Jan 13 16:48:40 2014 +0100
3283a7
3283a7
        fs: factor out common code in fget_light() and fget_raw_light()
3283a7
3283a7
        Apart from FMODE_PATH check fget_light() and fget_raw_light() are
3283a7
        identical, shift the code into the new helper, __fget_light(fd, mask).
3283a7
        Saves 208 bytes.
3283a7
3283a7
        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
3283a7
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
commit 9e24c8894f5df488a336f0c848f15a7d2f78d163
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:30 2022 +0100
3283a7
3283a7
    fs: __fget_light() can use __fget() in slow path
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
3283a7
    commit e6ff9a9fa4e05c1c03dec63cdc6a87d6dea02755
3283a7
    Author: Oleg Nesterov <oleg@redhat.com>
3283a7
    Date:   Mon Jan 13 16:49:06 2014 +0100
3283a7
3283a7
        fs: __fget_light() can use __fget() in slow path
3283a7
3283a7
        The slow path in __fget_light() can use __fget() to avoid the
3283a7
        code duplication. Saves 232 bytes.
3283a7
3283a7
        Signed-off-by: Oleg Nesterov <oleg@redhat.com>
3283a7
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
commit d63fb584ae2d7d9a1620e23e59072cb6929f3833
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:30 2022 +0100
3283a7
3283a7
    fs/file.c: __fget() and dup2() atomicity rules
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
3283a7
    commit 5ba97d2832f87943c43bb69cb1ef86dbc59df5bc
3283a7
    Author: Eric Dumazet <edumazet@google.com>
3283a7
    Date:   Mon Jun 29 17:10:30 2015 +0200
3283a7
3283a7
        fs/file.c: __fget() and dup2() atomicity rules
3283a7
3283a7
        __fget() does lockless fetch of pointer from the descriptor
3283a7
        table, attempts to grab a reference and treats "it was already
3283a7
        zero" as "it's already gone from the table, we just hadn't
3283a7
        seen the store, let's fail".  Unfortunately, that breaks the
3283a7
        atomicity of dup2() - __fget() might see the old pointer,
3283a7
        notice that it's been already dropped and treat that as
3283a7
        "it's closed".  What we should be getting is either the
3283a7
        old file or new one, depending whether we come before or after
3283a7
        dup2().
3283a7
3283a7
        Dmitry had following test failing sometimes :
3283a7
3283a7
        int fd;
3283a7
        void *Thread(void *x) {
3283a7
          char buf;
3283a7
          int n = read(fd, &buf, 1);
3283a7
          if (n != 1)
3283a7
            exit(printf("read failed: n=%d errno=%d\n", n, errno));
3283a7
          return 0;
3283a7
        }
3283a7
3283a7
        int main()
3283a7
        {
3283a7
          fd = open("/dev/urandom", O_RDONLY);
3283a7
          int fd2 = open("/dev/urandom", O_RDONLY);
3283a7
          if (fd == -1 || fd2 == -1)
3283a7
            exit(printf("open failed\n"));
3283a7
          pthread_t th;
3283a7
          pthread_create(&th, 0, Thread, 0);
3283a7
          if (dup2(fd2, fd) == -1)
3283a7
            exit(printf("dup2 failed\n"));
3283a7
          pthread_join(th, 0);
3283a7
          if (close(fd) == -1)
3283a7
            exit(printf("close failed\n"));
3283a7
          if (close(fd2) == -1)
3283a7
            exit(printf("close failed\n"));
3283a7
          printf("DONE\n");
3283a7
          return 0;
3283a7
        }
3283a7
3283a7
        Signed-off-by: Eric Dumazet <edumazet@google.com>
3283a7
        Reported-by: Dmitry Vyukov <dvyukov@google.com>
3283a7
        Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
commit bc04a10c9303dcd9a6305a0452361537257fa0c1
3283a7
Author: Miklos Szeredi <mszeredi@redhat.com>
3283a7
Date:   Fri Jan 21 10:22:31 2022 +0100
3283a7
3283a7
    fget: check that the fd still exists after getting a ref to it
3283a7
3283a7
    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2032478
3283a7
    Upstream status: Linus
3283a7
    Testing: xfstests
3283a7
    CVE: CVE-2021-4083
3283a7
3283a7
    commit 054aa8d439b9185d4f5eb9a90282d1ce74772969
3283a7
    Author: Linus Torvalds <torvalds@linux-foundation.org>
3283a7
    Date:   Wed Dec 1 10:06:14 2021 -0800
3283a7
3283a7
        fget: check that the fd still exists after getting a ref to it
3283a7
3283a7
        Jann Horn points out that there is another possible race wrt Unix domain
3283a7
        socket garbage collection, somewhat reminiscent of the one fixed in
3283a7
        commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK").
3283a7
3283a7
        See the extended comment about the garbage collection requirements added
3283a7
        to unix_peek_fds() by that commit for details.
3283a7
3283a7
        The race comes from how we can locklessly look up a file descriptor just
3283a7
        as it is in the process of being closed, and with the right artificial
3283a7
        timing (Jann added a few strategic 'mdelay(500)' calls to do that), the
3283a7
        Unix domain socket garbage collector could see the reference count
3283a7
        decrement of the close() happen before fget() took its reference to the
3283a7
        file and the file was attached onto a new file descriptor.
3283a7
3283a7
        This is all (intentionally) correct on the 'struct file *' side, with
3283a7
        RCU lookups and lockless reference counting very much part of the
3283a7
        design.  Getting that reference count out of order isn't a problem per
3283a7
        se.
3283a7
3283a7
        But the garbage collector can get confused by seeing this situation of
3283a7
        having seen a file not having any remaining external references and then
3283a7
        seeing it being attached to an fd.
3283a7
3283a7
        In commit cbcf01128d0a ("af_unix: fix garbage collect vs MSG_PEEK") the
3283a7
        fix was to serialize the file descriptor install with the garbage
3283a7
        collector by taking and releasing the unix_gc_lock.
3283a7
3283a7
        That's not really an option here, but since this all happens when we are
3283a7
        in the process of looking up a file descriptor, we can instead simply
3283a7
        just re-check that the file hasn't been closed in the meantime, and just
3283a7
        re-do the lookup if we raced with a concurrent close() of the same file
3283a7
        descriptor.
3283a7
3283a7
        Reported-and-tested-by: Jann Horn <jannh@google.com>
3283a7
        Acked-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
        Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
3283a7
3283a7
    Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
3283a7
3283a7
Signed-off-by: Joe Lawrence <joe.lawrence@redhat.com>
3283a7
---
3283a7
 fs/file.c               | 86 +++++++++++++++--------------------------
3283a7
 include/linux/fdtable.h | 35 ++++++++++-------
3283a7
 2 files changed, 53 insertions(+), 68 deletions(-)
3283a7
3283a7
diff --git a/fs/file.c b/fs/file.c
3283a7
index 44bd634b636a..564d60bf0fda 100644
3283a7
--- a/fs/file.c
3283a7
+++ b/fs/file.c
3283a7
@@ -718,42 +718,43 @@ void do_close_on_exec(struct files_struct *files)
3283a7
 	spin_unlock(&files->file_lock);
3283a7
 }
3283a7
 
3283a7
-struct file *fget(unsigned int fd)
3283a7
+static struct file *__fget(unsigned int fd, fmode_t mask)
3283a7
 {
3283a7
-	struct file *file;
3283a7
 	struct files_struct *files = current->files;
3283a7
+	struct file *file;
3283a7
 
3283a7
 	rcu_read_lock();
3283a7
+loop:
3283a7
 	file = fcheck_files(files, fd);
3283a7
 	if (file) {
3283a7
-		/* File object ref couldn't be taken */
3283a7
-		if (file->f_mode & FMODE_PATH || !get_file_rcu(file))
3283a7
+		/* File object ref couldn't be taken.
3283a7
+		 * dup2() atomicity guarantee is the reason
3283a7
+		 * we loop to catch the new file (or NULL pointer)
3283a7
+		 */
3283a7
+		if (file->f_mode & mask)
3283a7
 			file = NULL;
3283a7
+		else if (!get_file_rcu(file))
3283a7
+			goto loop;
3283a7
+		else if (fcheck_files(files, fd) != file) {
3283a7
+			fput(file);
3283a7
+			goto loop;
3283a7
+		}
3283a7
 	}
3283a7
 	rcu_read_unlock();
3283a7
 
3283a7
 	return file;
3283a7
 }
3283a7
 
3283a7
+__attribute__((optimize("-fno-optimize-sibling-calls"))) struct file *fget(unsigned int fd)
3283a7
+{
3283a7
+	return __fget(fd, FMODE_PATH);
3283a7
+}
3283a7
 EXPORT_SYMBOL(fget);
3283a7
 
3283a7
-struct file *fget_raw(unsigned int fd)
3283a7
+__attribute__((optimize("-fno-optimize-sibling-calls"))) struct file *fget_raw(unsigned int fd)
3283a7
 {
3283a7
-	struct file *file;
3283a7
-	struct files_struct *files = current->files;
3283a7
-
3283a7
-	rcu_read_lock();
3283a7
-	file = fcheck_files(files, fd);
3283a7
-	if (file) {
3283a7
-		/* File object ref couldn't be taken */
3283a7
-		if (!atomic_long_inc_not_zero(&file->f_count))
3283a7
-			file = NULL;
3283a7
-	}
3283a7
-	rcu_read_unlock();
3283a7
-
3283a7
-	return file;
3283a7
+	return __fget(fd, 0);
3283a7
 }
3283a7
-
3283a7
 EXPORT_SYMBOL(fget_raw);
3283a7
 
3283a7
 /*
3283a7
@@ -772,56 +773,33 @@ EXPORT_SYMBOL(fget_raw);
3283a7
  * The fput_needed flag returned by fget_light should be passed to the
3283a7
  * corresponding fput_light.
3283a7
  */
3283a7
-struct file *fget_light(unsigned int fd, int *fput_needed)
3283a7
+struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed)
3283a7
 {
3283a7
-	struct file *file;
3283a7
 	struct files_struct *files = current->files;
3283a7
+	struct file *file;
3283a7
 
3283a7
 	*fput_needed = 0;
3283a7
 	if (atomic_read(&files->count) == 1) {
3283a7
-		file = fcheck_files(files, fd);
3283a7
-		if (file && (file->f_mode & FMODE_PATH))
3283a7
+		file = __fcheck_files(files, fd);
3283a7
+		if (file && (file->f_mode & mask))
3283a7
 			file = NULL;
3283a7
 	} else {
3283a7
-		rcu_read_lock();
3283a7
-		file = fcheck_files(files, fd);
3283a7
-		if (file) {
3283a7
-			if (!(file->f_mode & FMODE_PATH) &&
3283a7
-			    atomic_long_inc_not_zero(&file->f_count))
3283a7
-				*fput_needed = 1;
3283a7
-			else
3283a7
-				/* Didn't get the reference, someone's freed */
3283a7
-				file = NULL;
3283a7
-		}
3283a7
-		rcu_read_unlock();
3283a7
+		file = __fget(fd, mask);
3283a7
+		if (file)
3283a7
+			*fput_needed = 1;
3283a7
 	}
3283a7
 
3283a7
 	return file;
3283a7
 }
3283a7
+struct file *fget_light(unsigned int fd, int *fput_needed)
3283a7
+{
3283a7
+	return __fget_light(fd, FMODE_PATH, fput_needed);
3283a7
+}
3283a7
 EXPORT_SYMBOL(fget_light);
3283a7
 
3283a7
 struct file *fget_raw_light(unsigned int fd, int *fput_needed)
3283a7
 {
3283a7
-	struct file *file;
3283a7
-	struct files_struct *files = current->files;
3283a7
-
3283a7
-	*fput_needed = 0;
3283a7
-	if (atomic_read(&files->count) == 1) {
3283a7
-		file = fcheck_files(files, fd);
3283a7
-	} else {
3283a7
-		rcu_read_lock();
3283a7
-		file = fcheck_files(files, fd);
3283a7
-		if (file) {
3283a7
-			if (atomic_long_inc_not_zero(&file->f_count))
3283a7
-				*fput_needed = 1;
3283a7
-			else
3283a7
-				/* Didn't get the reference, someone's freed */
3283a7
-				file = NULL;
3283a7
-		}
3283a7
-		rcu_read_unlock();
3283a7
-	}
3283a7
-
3283a7
-	return file;
3283a7
+	return __fget_light(fd, 0, fput_needed);
3283a7
 }
3283a7
 
3283a7
 void set_close_on_exec(unsigned int fd, int flag)
3283a7
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
3283a7
index 88d74ca9418f..95bcca7c1a0f 100644
3283a7
--- a/include/linux/fdtable.h
3283a7
+++ b/include/linux/fdtable.h
3283a7
@@ -70,29 +70,36 @@ struct files_struct {
3283a7
 	RH_KABI_EXTEND(wait_queue_head_t resize_wait)
3283a7
 };
3283a7
 
3283a7
-#define rcu_dereference_check_fdtable(files, fdtfd) \
3283a7
-	(rcu_dereference_check((fdtfd), \
3283a7
-			       lockdep_is_held(&(files)->file_lock) || \
3283a7
-			       atomic_read(&(files)->count) == 1 || \
3283a7
-			       rcu_my_thread_group_empty()))
3283a7
-
3283a7
-#define files_fdtable(files) \
3283a7
-		(rcu_dereference_check_fdtable((files), (files)->fdt))
3283a7
-
3283a7
 struct file_operations;
3283a7
 struct vfsmount;
3283a7
 struct dentry;
3283a7
 
3283a7
-static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
3283a7
+#define rcu_dereference_check_fdtable(files, fdtfd) \
3283a7
+	rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
3283a7
+
3283a7
+#define files_fdtable(files) \
3283a7
+	rcu_dereference_check_fdtable((files), (files)->fdt)
3283a7
+
3283a7
+/*
3283a7
+ * The caller must ensure that fd table isn't shared or hold rcu or file lock
3283a7
+ */
3283a7
+static inline struct file *__fcheck_files(struct files_struct *files, unsigned int fd)
3283a7
 {
3283a7
-	struct file * file = NULL;
3283a7
-	struct fdtable *fdt = files_fdtable(files);
3283a7
+	struct fdtable *fdt = rcu_dereference_raw(files->fdt);
3283a7
 
3283a7
 	if (fd < fdt->max_fds) {
3283a7
 		fd = array_index_nospec(fd, fdt->max_fds);
3283a7
-		file = rcu_dereference_check_fdtable(files, fdt->fd[fd]);
3283a7
+		return rcu_dereference_raw(fdt->fd[fd]);
3283a7
 	}
3283a7
-	return file;
3283a7
+	return NULL;
3283a7
+}
3283a7
+
3283a7
+static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd)
3283a7
+{
3283a7
+	rcu_lockdep_assert(rcu_read_lock_held() ||
3283a7
+			   lockdep_is_held(&files->file_lock),
3283a7
+			   "suspicious rcu_dereference_check() usage");
3283a7
+	return __fcheck_files(files, fd);
3283a7
 }
3283a7
 
3283a7
 /*
3283a7
-- 
3283a7
2.26.3
3283a7
3283a7