From 0cab45913735bc9ba513da395b2ef5ce8cc41339 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Oct 24 2024 07:16:13 +0000 Subject: spec: cherry pick openat2 qemu-user support from usptream 9651cea This commit adds support for the openat2 syscall to qemu-user. It is done via cherry picking upstream 9651cea and adding a extra commit with a bunch of `#ifdef TARGET_NR_openat2` so that this commit compiles on the `cris-linux-user` target which does not have this syscall. Cris is removed in upstream qemu after v9.1.0 so the ifdefs were not needed there but are needed here until cris is also removed from the RPM. --- diff --git a/0001-linux-user-add-openat2-support-in-linux-user.patch b/0001-linux-user-add-openat2-support-in-linux-user.patch new file mode 100644 index 0000000..ea4760a --- /dev/null +++ b/0001-linux-user-add-openat2-support-in-linux-user.patch @@ -0,0 +1,228 @@ +From 9651cead2f1bb34b9b72f9c2c5dc81baea2b082e Mon Sep 17 00:00:00 2001 +From: Michael Vogt +Date: Tue, 1 Oct 2024 17:14:53 +0200 +Subject: [PATCH] linux-user: add openat2 support in linux-user + +This commit adds support for the `openat2()` syscall in the +`linux-user` userspace emulator. + +It is implemented by extracting a new helper `maybe_do_fake_open()` +out of the exiting `do_guest_openat()` and share that with the +new `do_guest_openat2()`. Unfortunately we cannot just make +do_guest_openat2() a superset of do_guest_openat() because the +openat2() syscall is stricter with the argument checking and +will return an error for invalid flags or mode combinations (which +open()/openat() will ignore). + +The implementation is similar to SYSCALL_DEFINE(openat2), i.e. +a new `copy_struct_from_user()` is used that works the same +as the kernels version to support backwards-compatibility +for struct syscall argument. + +Instead of including openat2.h we create a copy of `open_how` +as `open_how_ver0` to ensure that if the structure grows we +can log a LOG_UNIMP warning. + +Note that in this commit using openat2() for a "faked" file in +/proc will honor the "resolve" flags for +RESOLVE_NO_{MAGIC,SYM}LINKS for path based access to /proc/self/exe +(which is the only magic link we support for faked files). +Note it will not catch special access via e.g. dirfd. This is not +great but it seems similar to the exiting behavior when openat() +is called with a dirfd to "/proc". Here too the fake file lookup +may not catch the special file because no dirfd is used to +determine if the path is in /proc. + +Signed-off-by: Michael Vogt +Buglink: https://github.com/osbuild/bootc-image-builder/issues/619 +Reviewed-by: Laurent Vivier +Message-ID: <1c2c8c9db3731ed4c6fd9b10c63637c3e4caf8f5.1727795334.git.mvogt@redhat.com> +Signed-off-by: Richard Henderson +--- + linux-user/syscall.c | 105 +++++++++++++++++++++++++++++++++++++- + linux-user/syscall_defs.h | 13 +++++ + 2 files changed, 116 insertions(+), 2 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index a666986189..2febc3bc3f 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -602,6 +602,34 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) + return 1; + } + ++/* ++ * Copies a target struct to a host struct, in a way that guarantees ++ * backwards-compatibility for struct syscall arguments. ++ * ++ * Similar to kernels uaccess.h:copy_struct_from_user() ++ */ ++static int ++copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) ++{ ++ size_t size = MIN(ksize, usize); ++ size_t rest = MAX(ksize, usize) - size; ++ ++ /* Deal with trailing bytes. */ ++ if (usize < ksize) { ++ memset(dst + size, 0, rest); ++ } else if (usize > ksize) { ++ int ret = check_zeroed_user(src, ksize, usize); ++ if (ret <= 0) { ++ return ret ?: -TARGET_E2BIG; ++ } ++ } ++ /* Copy the interoperable parts of the struct. */ ++ if (copy_from_user(dst, src, size)) { ++ return -TARGET_EFAULT; ++ } ++ return 0; ++} ++ + #define safe_syscall0(type, name) \ + static type safe_##name(void) \ + { \ +@@ -653,6 +681,15 @@ safe_syscall3(ssize_t, read, int, fd, void *, buff, size_t, count) + safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) + safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ + int, flags, mode_t, mode) ++ ++struct open_how_ver0 { ++ __u64 flags; ++ __u64 mode; ++ __u64 resolve; ++}; ++safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ ++ const struct open_how_ver0 *, how, size_t, size) ++ + #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid) + safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ + struct rusage *, rusage) +@@ -8332,8 +8369,9 @@ static int open_net_route(CPUArchState *cpu_env, int fd) + } + #endif + +-int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, +- int flags, mode_t mode, bool safe) ++static int maybe_do_fake_open(CPUArchState *cpu_env, int dirfd, ++ const char *fname, int flags, mode_t mode, ++ int openat2_resolve, bool safe) + { + g_autofree char *proc_name = NULL; + const char *pathname; +@@ -8370,6 +8408,12 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, + } + + if (is_proc_myself(pathname, "exe")) { ++ /* Honor openat2 resolve flags */ ++ if ((openat2_resolve & RESOLVE_NO_MAGICLINKS) || ++ (openat2_resolve & RESOLVE_NO_SYMLINKS)) { ++ errno = ELOOP; ++ return -1; ++ } + if (safe) { + return safe_openat(dirfd, exec_path, flags, mode); + } else { +@@ -8416,6 +8460,17 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, + return fd; + } + ++ return -2; ++} ++ ++int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, ++ int flags, mode_t mode, bool safe) ++{ ++ int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, flags, mode, 0, safe); ++ if (fd > -2) { ++ return fd; ++ } ++ + if (safe) { + return safe_openat(dirfd, path(pathname), flags, mode); + } else { +@@ -8423,6 +8478,49 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *fname, + } + } + ++ ++static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, ++ abi_ptr guest_pathname, abi_ptr guest_open_how, ++ abi_ulong guest_size) ++{ ++ struct open_how_ver0 how = {0}; ++ char *pathname; ++ int ret; ++ ++ if (guest_size < sizeof(struct target_open_how_ver0)) { ++ return -TARGET_EINVAL; ++ } ++ ret = copy_struct_from_user(&how, sizeof(how), guest_open_how, guest_size); ++ if (ret) { ++ if (ret == -TARGET_E2BIG) { ++ qemu_log_mask(LOG_UNIMP, ++ "Unimplemented openat2 open_how size: " ++ TARGET_ABI_FMT_lu "\n", guest_size); ++ } ++ return ret; ++ } ++ pathname = lock_user_string(guest_pathname); ++ if (!pathname) { ++ return -TARGET_EFAULT; ++ } ++ ++ how.flags = target_to_host_bitmask(tswap64(how.flags), fcntl_flags_tbl); ++ how.mode = tswap64(how.mode); ++ how.resolve = tswap64(how.resolve); ++ int fd = maybe_do_fake_open(cpu_env, dirfd, pathname, how.flags, how.mode, ++ how.resolve, true); ++ if (fd > -2) { ++ ret = get_errno(fd); ++ } else { ++ ret = get_errno(safe_openat2(dirfd, pathname, &how, ++ sizeof(struct open_how_ver0))); ++ } ++ ++ fd_trans_unregister(ret); ++ unlock_user(pathname, guest_pathname, 0); ++ return ret; ++} ++ + ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz) + { + ssize_t ret; +@@ -9195,6 +9293,9 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, + fd_trans_unregister(ret); + unlock_user(p, arg2, 0); + return ret; ++ case TARGET_NR_openat2: ++ ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4); ++ return ret; + #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_name_to_handle_at: + ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); +diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h +index e08d088740..de5091c977 100644 +--- a/linux-user/syscall_defs.h ++++ b/linux-user/syscall_defs.h +@@ -2748,4 +2748,17 @@ struct target_sched_param { + abi_int sched_priority; + }; + ++/* from kernel's include/uapi/linux/openat2.h */ ++struct target_open_how_ver0 { ++ abi_ullong flags; ++ abi_ullong mode; ++ abi_ullong resolve; ++}; ++#ifndef RESOLVE_NO_MAGICLINKS ++#define RESOLVE_NO_MAGICLINKS 0x02 ++#endif ++#ifndef RESOLVE_NO_SYMLINKS ++#define RESOLVE_NO_SYMLINKS 0x04 ++#endif ++ + #endif +-- +2.47.0 + diff --git a/0001-linux-user-guard-openat2-with-if-defined-TARGET_NR_o.patch b/0001-linux-user-guard-openat2-with-if-defined-TARGET_NR_o.patch new file mode 100644 index 0000000..83d159e --- /dev/null +++ b/0001-linux-user-guard-openat2-with-if-defined-TARGET_NR_o.patch @@ -0,0 +1,85 @@ +From b5aa46fc7bb03877bbea711903e19ad4e27e8259 Mon Sep 17 00:00:00 2001 +From: Michael Vogt +Date: Wed, 23 Oct 2024 09:50:56 +0200 +Subject: [PATCH] linux-user: guard openat2 with `#if + defined(TARGET_NR_openat2)` + +This commit adds a bunch of `#ifdef` around the openat2 support. +We need this to build the `cris-linux-user` target which is still +present in this version but got dropped from upstream in commit +44e4075bf4 but is still present in v9.1.0. + +This patch can be dropped once cris is also removed from the +package. +--- + linux-user/syscall.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 85d61db546..22e5ad3c5f 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -608,6 +608,7 @@ static int check_zeroed_user(abi_long addr, size_t ksize, size_t usize) + * + * Similar to kernels uaccess.h:copy_struct_from_user() + */ ++#if defined(TARGET_NR_openat2) + static int + copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) + { +@@ -629,6 +630,7 @@ copy_struct_from_user(void *dst, size_t ksize, abi_ptr src, size_t usize) + } + return 0; + } ++#endif + + #define safe_syscall0(type, name) \ + static type safe_##name(void) \ +@@ -682,6 +684,7 @@ safe_syscall3(ssize_t, write, int, fd, const void *, buff, size_t, count) + safe_syscall4(int, openat, int, dirfd, const char *, pathname, \ + int, flags, mode_t, mode) + ++#if defined(TARGET_NR_openat2) + struct open_how_ver0 { + __u64 flags; + __u64 mode; +@@ -689,6 +692,7 @@ struct open_how_ver0 { + }; + safe_syscall4(int, openat2, int, dirfd, const char *, pathname, \ + const struct open_how_ver0 *, how, size_t, size) ++#endif + + #if defined(TARGET_NR_wait4) || defined(TARGET_NR_waitpid) + safe_syscall4(pid_t, wait4, pid_t, pid, int *, status, int, options, \ +@@ -8480,7 +8484,7 @@ int do_guest_openat(CPUArchState *cpu_env, int dirfd, const char *pathname, + } + } + +- ++#if defined(TARGET_NR_openat2) + static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, + abi_ptr guest_pathname, abi_ptr guest_open_how, + abi_ulong guest_size) +@@ -8522,6 +8526,7 @@ static int do_openat2(CPUArchState *cpu_env, abi_long dirfd, + unlock_user(pathname, guest_pathname, 0); + return ret; + } ++#endif + + ssize_t do_guest_readlink(const char *pathname, char *buf, size_t bufsiz) + { +@@ -9295,9 +9300,11 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, + fd_trans_unregister(ret); + unlock_user(p, arg2, 0); + return ret; ++#if defined(TARGET_NR_openat2) + case TARGET_NR_openat2: + ret = do_openat2(cpu_env, arg1, arg2, arg3, arg4); + return ret; ++#endif + #if defined(TARGET_NR_name_to_handle_at) && defined(CONFIG_OPEN_BY_HANDLE) + case TARGET_NR_name_to_handle_at: + ret = do_name_to_handle_at(arg1, arg2, arg3, arg4, arg5); +-- +2.47.0 + diff --git a/qemu.spec b/qemu.spec index e5ee8c3..6db65fe 100644 --- a/qemu.spec +++ b/qemu.spec @@ -423,6 +423,12 @@ Patch: 0001-Disable-9p-local-tests-that-fail-on-copr-aarch64.patch # Fix compat with new glibc (not upstream yet) Patch: schedattr.patch +# Openat2 support (upstream commit 9651cea) +Patch: 0001-linux-user-add-openat2-support-in-linux-user.patch +# linux-user-cris support for openat2, can be removed once "cris" is +# removed (after v9.1.0) +Patch: 0001-linux-user-guard-openat2-with-if-defined-TARGET_NR_o.patch + BuildRequires: gnupg2 BuildRequires: meson >= %{meson_version} BuildRequires: bison