diff -up util-linux-2.23.2/include/pathnames.h.kzak util-linux-2.23.2/include/pathnames.h
--- util-linux-2.23.2/include/pathnames.h.kzak 2015-06-26 10:00:19.111877564 +0200
+++ util-linux-2.23.2/include/pathnames.h 2015-06-26 10:00:51.623630869 +0200
@@ -85,6 +85,10 @@
#define _PATH_PROC_LOCKS "/proc/locks"
#define _PATH_PROC_CDROMINFO "/proc/sys/dev/cdrom/info"
+#define _PATH_PROC_UIDMAP "/proc/self/uid_map"
+#define _PATH_PROC_GIDMAP "/proc/self/gid_map"
+#define _PATH_PROC_SETGROUPS "/proc/self/setgroups"
+
#define _PATH_PROC_ATTR_CURRENT "/proc/self/attr/current"
#define _PATH_PROC_ATTR_EXEC "/proc/self/attr/exec"
#define _PATH_PROC_CAPLASTCAP "/proc/sys/kernel/cap_last_cap"
diff -up util-linux-2.23.2/sys-utils/Makemodule.am.kzak util-linux-2.23.2/sys-utils/Makemodule.am
diff -up util-linux-2.23.2/sys-utils/nsenter.1.kzak util-linux-2.23.2/sys-utils/nsenter.1
--- util-linux-2.23.2/sys-utils/nsenter.1.kzak 2015-06-26 09:58:39.468633643 +0200
+++ util-linux-2.23.2/sys-utils/nsenter.1 2015-06-26 09:58:51.672541041 +0200
@@ -1,44 +1,45 @@
-.TH NSENTER 1 "January 2013" "util-linux" "User Commands"
+.TH NSENTER 1 "June 2013" "util-linux" "User Commands"
.SH NAME
nsenter \- run program with namespaces of other processes
.SH SYNOPSIS
.B nsenter
-.RI [ options ]
-.RI [ program ]
-.RI [ arguments ]
+[options]
+.RI [ program
+.RI [ arguments ]]
.SH DESCRIPTION
Enters the namespaces of one or more other processes and then executes the specified
program. Enterable namespaces are:
.TP
.B mount namespace
-mounting and unmounting filesystems will not affect rest of the system
+Mounting and unmounting filesystems will not affect the rest of the system
.RB ( CLONE_\:NEWNS
-flag), except for filesystems which are explicitly marked as shared (by mount
---make-\:shared). See /proc\:/self\:/mountinfo for the shared flag.
+flag), except for filesystems which are explicitly marked as shared (with
+\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the
+\fBshared\fP flag).
.TP
.B UTS namespace
-setting hostname, domainname will not affect rest of the system
+Setting hostname or domainname will not affect the rest of the system.
.RB ( CLONE_\:NEWUTS
-flag).
+flag)
.TP
.B IPC namespace
-process will have independent namespace for System V message queues, semaphore
-sets and shared memory segments
+The process will have an independent namespace for System V message queues,
+semaphore sets and shared memory segments.
.RB ( CLONE_\:NEWIPC
-flag).
+flag)
.TP
.B network namespace
-process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall
-rules, the
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
+firewall rules, the
.I /proc\:/net
and
.I /sys\:/class\:/net
-directory trees, sockets etc.
+directory trees, sockets, etc.
.RB ( CLONE_\:NEWNET
-flag).
+flag)
.TP
.B PID namespace
-children will have a set of PID to process mappings separate from the
+Children will have a set of PID to process mappings separate from the
.B nsenter
process
.RB ( CLONE_\:NEWPID
@@ -46,18 +47,18 @@ flag).
.B nsenter
will fork by default if changing the PID namespace, so that the new program
and its children share the same PID namespace and are visible to each other.
-If \-\-no\-fork is used, the new program will be exec'ed without forking.
-.PP
-See the
-.BR clone (2)
-for exact semantics of the flags.
+If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking.
.TP
-If program is not given, run ``${SHELL}'' (default: /bin\:/sh).
+.B user namespace
+The process will have a distinct set of UIDs, GIDs and capabilities.
+.RB ( CLONE_\:NEWUSER
+flag)
+.TP
+See \fBclone\fP(2) for the exact semantics of the flags.
+.TP
+If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh).
.SH OPTIONS
-Argument with square brakets, such as [\fIfile\fR], means optional argument.
-Command line syntax to specify optional argument \-\-mount=/path\:/to\:/file.
-Please notice the equals sign.
.TP
\fB\-t\fR, \fB\-\-target\fR \fIpid\fP
Specify a target process to get contexts from. The paths to the contexts
@@ -83,6 +84,9 @@ the network namespace
/proc/\fIpid\fR/ns/pid
the PID namespace
.TP
+/proc/\fIpid\fR/ns/user
+the user namespace
+.TP
/proc/\fIpid\fR/root
the root directory
.TP
@@ -91,51 +95,71 @@ the working directory respectively
.PD
.RE
.TP
-\fB\-m\fR, \fB\-\-mount\fR [\fIfile\fR]
-Enter the mount namespace. If no file is specified enter the mount namespace
-of the target process. If file is specified enter the mount namespace
+\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR]
+Enter the mount namespace. If no file is specified, enter the mount namespace
+of the target process. If file is specified, enter the mount namespace
specified by file.
.TP
-\fB\-u\fR, \fB\-\-uts\fR [\fIfile\fR]
-Enter the UTS namespace. If no file is specified enter the UTS namespace of
-the target process. If file is specified enter the UTS namespace specified by
+\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR]
+Enter the UTS namespace. If no file is specified, enter the UTS namespace of
+the target process. If file is specified, enter the UTS namespace specified by
file.
.TP
-\fB\-i\fR, \fB\-\-ipc\fR [\fIfile\fR]
-Enter the IPC namespace. If no file is specified enter the IPC namespace of
-the target process. If file is specified enter the IPC namespace specified by
+\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR]
+Enter the IPC namespace. If no file is specified, enter the IPC namespace of
+the target process. If file is specified, enter the IPC namespace specified by
file.
.TP
-\fB\-n\fR, \fB\-\-net\fR [\fIfile\fR]
-Enter the network namespace. If no file is specified enter the network
-namespace of the target process. If file is specified enter the network
+\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR]
+Enter the network namespace. If no file is specified, enter the network
+namespace of the target process. If file is specified, enter the network
namespace specified by file.
.TP
-\fB\-p\fR, \fB\-\-pid\fR [\fIfile\fR]
-Enter the PID namespace. If no file is specified enter the PID namespace of
-the target process. If file is specified enter the PID namespace specified by
+\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR]
+Enter the PID namespace. If no file is specified, enter the PID namespace of
+the target process. If file is specified, enter the PID namespace specified by
file.
.TP
-\fB\-r\fR, \fB\-\-root\fR [\fIdirectory\fR]
-Set the root directory. If no directory is specified set the root directory to
-the root directory of the target process. If directory is specified set the
+\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR]
+Enter the user namespace. If no file is specified, enter the user namespace of
+the target process. If file is specified, enter the user namespace specified by
+file. See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options.
+.TP
+\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR
+Set the group ID which will be used in the entered namespace and drop
+supplementary groups.
+.BR nsenter (1)
+always sets GID for user namespaces, the default is 0.
+.TP
+\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR
+Set the user ID which will be used in the entered namespace.
+.BR nsenter (1)
+always sets UID for user namespaces, the default is 0.
+.TP
+\fB\-\-preserve\-credentials\fR
+Don't modify UID and GID when enter user namespace. The default is to
+drops supplementary groups and sets GID and UID to 0.
+.TP
+\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR]
+Set the root directory. If no directory is specified, set the root directory to
+the root directory of the target process. If directory is specified, set the
root directory to the specified directory.
.TP
-\fB\-w\fR, \fB\-\-wd\fR [\fIdirectory\fR]
-Set the working directory. If no directory is specified set the working
+\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR]
+Set the working directory. If no directory is specified, set the working
directory to the working directory of the target process. If directory is
-specified set the working directory to the specified directory.
+specified, set the working directory to the specified directory.
.TP
-\fB\-F\fR, \fB\-\-no-fork\fR
-Do not fork before exec'ing the specified program. By default when entering a
-pid namespace enter calls fork before calling exec so that the children will be
-in the newly entered pid namespace.
+\fB\-F\fR, \fB\-\-no\-fork\fR
+Do not fork before exec'ing the specified program. By default, when entering a
+PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that
+any children will also be in the newly entered PID namespace.
.TP
\fB\-V\fR, \fB\-\-version\fR
Display version information and exit.
.TP
\fB\-h\fR, \fB\-\-help\fR
-Print a help message.
+Display help text and exit.
.SH SEE ALSO
.BR setns (2),
.BR clone (2)
diff -up util-linux-2.23.2/sys-utils/nsenter.c.kzak util-linux-2.23.2/sys-utils/nsenter.c
--- util-linux-2.23.2/sys-utils/nsenter.c.kzak 2015-06-26 09:58:39.468633643 +0200
+++ util-linux-2.23.2/sys-utils/nsenter.c 2015-06-26 09:58:51.673541033 +0200
@@ -28,6 +28,7 @@
#include <assert.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <grp.h>
#include "strutils.h"
#include "nls.h"
@@ -42,7 +43,12 @@ static struct namespace_file {
int fd;
} namespace_files[] = {
/* Careful the order is significant in this array.
+ *
+ * The user namespace comes first, so that it is entered
+ * first. This gives an unprivileged user the potential to
+ * enter the other namespaces.
*/
+ { .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 },
{ .nstype = CLONE_NEWIPC, .name = "ns/ipc", .fd = -1 },
{ .nstype = CLONE_NEWUTS, .name = "ns/uts", .fd = -1 },
{ .nstype = CLONE_NEWNET, .name = "ns/net", .fd = -1 },
@@ -56,18 +62,25 @@ static void usage(int status)
FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
fputs(USAGE_HEADER, out);
- fprintf(out, _(" %s [options] <program> [args...]\n"),
+ fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
program_invocation_short_name);
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program with namespaces of other processes.\n"), out);
+
fputs(USAGE_OPTIONS, out);
fputs(_(" -t, --target <pid> target process to get namespaces from\n"), out);
- fputs(_(" -m, --mount [=<file>] enter mount namespace\n"), out);
- fputs(_(" -u, --uts [=<file>] enter UTS namespace (hostname etc)\n"), out);
- fputs(_(" -i, --ipc [=<file>] enter System V IPC namespace\n"), out);
- fputs(_(" -n, --net [=<file>] enter network namespace\n"), out);
- fputs(_(" -p, --pid [=<file>] enter pid namespace\n"), out);
- fputs(_(" -r, --root [=<dir>] set the root directory\n"), out);
- fputs(_(" -w, --wd [=<dir>] set the working directory\n"), out);
+ fputs(_(" -m, --mount[=<file>] enter mount namespace\n"), out);
+ fputs(_(" -u, --uts[=<file>] enter UTS namespace (hostname etc)\n"), out);
+ fputs(_(" -i, --ipc[=<file>] enter System V IPC namespace\n"), out);
+ fputs(_(" -n, --net[=<file>] enter network namespace\n"), out);
+ fputs(_(" -p, --pid[=<file>] enter pid namespace\n"), out);
+ fputs(_(" -U, --user[=<file>] enter user namespace\n"), out);
+ fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out);
+ fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out);
+ fputs(_(" --preserve-credentials do not touch uids or gids\n"), out);
+ fputs(_(" -r, --root[=<dir>] set the root directory\n"), out);
+ fputs(_(" -w, --wd[=<dir>] set the working directory\n"), out);
fputs(_(" -F, --no-fork do not fork before exec'ing <program>\n"), out);
fputs(USAGE_SEPARATOR, out);
@@ -153,6 +166,9 @@ static void continue_as_child(void)
int main(int argc, char *argv[])
{
+ enum {
+ OPT_PRESERVE_CRED = CHAR_MAX + 1
+ };
static const struct option longopts[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V'},
@@ -162,24 +178,30 @@ int main(int argc, char *argv[])
{ "ipc", optional_argument, NULL, 'i' },
{ "net", optional_argument, NULL, 'n' },
{ "pid", optional_argument, NULL, 'p' },
+ { "user", optional_argument, NULL, 'U' },
+ { "setuid", required_argument, NULL, 'S' },
+ { "setgid", required_argument, NULL, 'G' },
{ "root", optional_argument, NULL, 'r' },
{ "wd", optional_argument, NULL, 'w' },
{ "no-fork", no_argument, NULL, 'F' },
+ { "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED },
{ NULL, 0, NULL, 0 }
};
struct namespace_file *nsfile;
- int c, namespaces = 0;
- bool do_rd = false, do_wd = false;
+ int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0;
+ bool do_rd = false, do_wd = false, force_uid = false, force_gid = false;
int do_fork = -1; /* unknown yet */
+ uid_t uid = 0;
+ gid_t gid = 0;
- setlocale(LC_MESSAGES, "");
+ setlocale(LC_ALL, "");
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
atexit(close_stdout);
while ((c =
- getopt_long(argc, argv, "hVt:m::u::i::n::p::r::w::F",
+ getopt_long(argc, argv, "+hVt:m::u::i::n::p::U::S:G:r::w::F",
longopts, NULL)) != -1) {
switch (c) {
case 'h':
@@ -221,6 +243,20 @@ int main(int argc, char *argv[])
else
namespaces |= CLONE_NEWPID;
break;
+ case 'U':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWUSER, optarg);
+ else
+ namespaces |= CLONE_NEWUSER;
+ break;
+ case 'S':
+ uid = strtoul_or_err(optarg, _("failed to parse uid"));
+ force_uid = true;
+ break;
+ case 'G':
+ gid = strtoul_or_err(optarg, _("failed to parse gid"));
+ force_gid = true;
+ break;
case 'F':
do_fork = 0;
break;
@@ -236,6 +272,9 @@ int main(int argc, char *argv[])
else
do_wd = true;
break;
+ case OPT_PRESERVE_CRED:
+ preserve_cred = 1;
+ break;
default:
usage(EXIT_FAILURE);
}
@@ -253,6 +292,26 @@ int main(int argc, char *argv[])
open_target_fd(&wd_fd, "cwd", NULL);
/*
+ * Update namespaces variable to contain all requested namespaces
+ */
+ for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+ if (nsfile->fd < 0)
+ continue;
+ namespaces |= nsfile->nstype;
+ }
+
+ /* for user namespaces we always set UID and GID (default is 0)
+ * and clear root's groups if --preserve-credentials is no specified */
+ if ((namespaces & CLONE_NEWUSER) && !preserve_cred) {
+ force_uid = true, force_gid = true;
+
+ /* We call setgroups() before and after we enter user namespace,
+ * let's complain only if both fail */
+ if (setgroups(0, NULL) != 0)
+ setgroups_nerrs++;
+ }
+
+ /*
* Now that we know which namespaces we want to enter, enter them.
*/
for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
@@ -302,6 +361,15 @@ int main(int argc, char *argv[])
if (do_fork == 1)
continue_as_child();
+ if (force_uid || force_gid) {
+ if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs) /* drop supplementary groups */
+ err(EXIT_FAILURE, _("setgroups failed"));
+ if (force_gid && setgid(gid) < 0) /* change GID */
+ err(EXIT_FAILURE, _("setgid failed"));
+ if (force_uid && setuid(uid) < 0) /* change UID */
+ err(EXIT_FAILURE, _("setuid failed"));
+ }
+
if (optind < argc) {
execvp(argv[optind], argv + optind);
err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]);
diff -up util-linux-2.23.2/sys-utils/unshare.1.kzak util-linux-2.23.2/sys-utils/unshare.1
--- util-linux-2.23.2/sys-utils/unshare.1.kzak 2015-06-26 09:58:39.484633521 +0200
+++ util-linux-2.23.2/sys-utils/unshare.1 2015-06-26 09:58:51.673541033 +0200
@@ -1,28 +1,27 @@
-.\" Process this file with
-.\" groff -man -Tascii lscpu.1
-.\"
-.TH UNSHARE 1 "July 2013" "util-linux" "User Commands"
+.TH UNSHARE 1 "July 2014" "util-linux" "User Commands"
.SH NAME
unshare \- run program with some namespaces unshared from parent
.SH SYNOPSIS
.B unshare
-.RI [ options ]
+[options]
.I program
.RI [ arguments ]
.SH DESCRIPTION
Unshares the indicated namespaces from the parent process and then executes
-the specified program. The namespaces to be unshared are indicated via
+the specified \fIprogram\fR. The namespaces to be unshared are indicated via
options. Unshareable namespaces are:
.TP
.BR "mount namespace"
Mounting and unmounting filesystems will not affect the rest of the system
(\fBCLONE_NEWNS\fP flag), except for filesystems which are explicitly marked as
-shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP for the
-\fBshared\fP flags).
-
-It's recommended to use \fBmount --make-rprivate\fP or \fBmount --make-rslave\fP
-after \fBunshare --mount\fP to make sure that mountpoints in the new namespace
-are really unshared from parental namespace.
+shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or
+\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags).
+.sp
+.B unshare
+automatically sets propagation to \fBprivate\fP
+in the new mount namespace to make sure that the new namespace is really
+unshared. This feature is possible to disable by option \fB\-\-propagation unchanged\fP.
+Note that \fBprivate\fP is the kernel default.
.TP
.BR "UTS namespace"
Setting hostname or domainname will not affect the rest of the system.
@@ -40,13 +39,14 @@ sockets, etc. (\fBCLONE_NEWNET\fP flag)
.BR "pid namespace"
Children will have a distinct set of PID to process mappings from their parent.
(\fBCLONE_NEWPID\fP flag)
+.TP
+.BR "user namespace"
+The process will have a distinct set of UIDs, GIDs and capabilities.
+(\fBCLONE_NEWUSER\fP flag)
.PP
See \fBclone\fR(2) for the exact semantics of the flags.
.SH OPTIONS
.TP
-.BR \-h , " \-\-help"
-Display help text and exit.
-.TP
.BR \-i , " \-\-ipc"
Unshare the IPC namespace.
.TP
@@ -63,16 +63,68 @@ See also the \fB--fork\fP and \fB--mount
.BR \-u , " \-\-uts"
Unshare the UTS namespace.
.TP
+.BR \-U , " \-\-user"
+Unshare the user namespace.
+.TP
.BR \-f , " \-\-fork"
Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than
running it directly. This is useful when creating a new pid namespace.
.TP
-.BR \-\-mount-proc "[=\fImountpoint\fP]"
-Just before running the program, mount the proc filesystem at the \fImountpoint\fP
+.BR \-\-mount\-proc "[=\fImountpoint\fP]"
+Just before running the program, mount the proc filesystem at \fImountpoint\fP
(default is /proc). This is useful when creating a new pid namespace. It also
implies creating a new mount namespace since the /proc mount would otherwise
-mess up existing programs on the system. The new proc filesystem is explicitly
+mess up existing programs on the system. The new proc filesystem is explicitly
mounted as private (by MS_PRIVATE|MS_REC).
+.TP
+.BR \-r , " \-\-map\-root\-user"
+Run the program only after the current effective user and group IDs have been mapped to
+the superuser UID and GID in the newly created user namespace. This makes it possible to
+conveniently gain capabilities needed to manage various aspects of the newly created
+namespaces (such as configuring interfaces in the network namespace or mounting filesystems in
+the mount namespace) even when run unprivileged. As a mere convenience feature, it does not support
+more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs.
+This option implies --setgroups=deny.
+.TP
+.BR "\-\-propagation \fIprivate|shared|slave|unchanged\fP"
+Recursively sets mount propagation flag in the new mount namespace. The default
+is to set the propagation to \fIprivate\fP, this feature is possible to disable
+by \fIunchanged\fP argument. The options is silently ignored when mount namespace (\fB\-\-mount\fP)
+is not requested.
+.TP
+.BR "\-\-setgroups \fIallow|deny\fP"
+Allow or deny
+.BR setgroups (2)
+syscall in user namespaces.
+
+.BR setgroups(2)
+is only callable with CAP_SETGID and CAP_SETGID in a user
+namespace (since Linux 3.19) does not give you permission to call setgroups(2)
+until after GID map has been set. The GID map is writable by root when
+.BR setgroups(2)
+is enabled and GID map becomes writable by unprivileged processes when
+.BR setgroups(2)
+is permanently disabled.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+.TP
+.B # unshare --fork --pid --mount-proc readlink /proc/self
+.TQ
+1
+.br
+Establish a PID namespace, ensure we're PID 1 in it against newly mounted
+procfs instance.
+.TP
+.B $ unshare --map-root-user --user sh -c whoami
+.TQ
+root
+.br
+Establish a user namespace as an unprivileged user with a root user within it.
.SH SEE ALSO
.BR unshare (2),
.BR clone (2),
diff -up util-linux-2.23.2/sys-utils/unshare.c.kzak util-linux-2.23.2/sys-utils/unshare.c
--- util-linux-2.23.2/sys-utils/unshare.c.kzak 2015-06-26 09:58:39.484633521 +0200
+++ util-linux-2.23.2/sys-utils/unshare.c 2015-06-26 09:58:51.673541033 +0200
@@ -32,19 +32,117 @@
#include "nls.h"
#include "c.h"
+#include "closestream.h"
#include "namespace.h"
#include "exec_shell.h"
#include "xalloc.h"
#include "pathnames.h"
+#include "all-io.h"
+/* 'private' is kernel default */
+#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE)
+
+enum {
+ SETGROUPS_NONE = -1,
+ SETGROUPS_DENY = 0,
+ SETGROUPS_ALLOW = 1,
+};
+
+static const char *setgroups_strings[] =
+{
+ [SETGROUPS_DENY] = "deny",
+ [SETGROUPS_ALLOW] = "allow"
+};
+
+static int setgroups_str2id(const char *str)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
+ if (strcmp(str, setgroups_strings[i]) == 0)
+ return i;
+
+ errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
+}
+
+static void setgroups_control(int action)
+{
+ const char *file = _PATH_PROC_SETGROUPS;
+ const char *cmd;
+ int fd;
+
+ if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
+ return;
+ cmd = setgroups_strings[action];
+
+ fd = open(file, O_WRONLY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return;
+ err(EXIT_FAILURE, _("cannot open %s"), file);
+ }
+
+ if (write_all(fd, cmd, strlen(cmd)))
+ err(EXIT_FAILURE, _("write failed %s"), file);
+ close(fd);
+}
+
+static void map_id(const char *file, uint32_t from, uint32_t to)
+{
+ char *buf;
+ int fd;
+
+ fd = open(file, O_WRONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), file);
+
+ xasprintf(&buf, "%u %u 1", from, to);
+ if (write_all(fd, buf, strlen(buf)))
+ err(EXIT_FAILURE, _("write failed %s"), file);
+ free(buf);
+ close(fd);
+}
+
+static unsigned long parse_propagation(const char *str)
+{
+ size_t i;
+ static const struct prop_opts {
+ const char *name;
+ unsigned long flag;
+ } opts[] = {
+ { "slave", MS_REC | MS_SLAVE },
+ { "private", MS_REC | MS_PRIVATE },
+ { "shared", MS_REC | MS_SHARED },
+ { "unchanged", 0 }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(opts); i++) {
+ if (strcmp(opts[i].name, str) == 0)
+ return opts[i].flag;
+ }
+
+ errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
+}
+
+static void set_propagation(unsigned long flags)
+{
+ if (flags == 0)
+ return;
+
+ if (mount("none", "/", NULL, flags, NULL) != 0)
+ err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
+}
static void usage(int status)
{
FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
fputs(USAGE_HEADER, out);
- fprintf(out,
- _(" %s [options] <program> [args...]\n"), program_invocation_short_name);
+ fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
fputs(USAGE_OPTIONS, out);
fputs(_(" -m, --mount unshare mounts namespace\n"), out);
@@ -52,8 +150,13 @@ static void usage(int status)
fputs(_(" -i, --ipc unshare System V IPC namespace\n"), out);
fputs(_(" -n, --net unshare network namespace\n"), out);
fputs(_(" -p, --pid unshare pid namespace\n"), out);
+ fputs(_(" -U, --user unshare user namespace\n"), out);
fputs(_(" -f, --fork fork before launching <program>\n"), out);
fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out);
+ fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out);
+ fputs(_(" --propagation <slave|shared|private|unchanged>\n"
+ " modify mount propagation in mount namespace\n"), out);
+ fputs(_(" -s, --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out);
fputs(USAGE_SEPARATOR, out);
fputs(USAGE_HELP, out);
@@ -66,7 +169,9 @@ static void usage(int status)
int main(int argc, char *argv[])
{
enum {
- OPT_MOUNTPROC = CHAR_MAX + 1
+ OPT_MOUNTPROC = CHAR_MAX + 1,
+ OPT_PROPAGATION,
+ OPT_SETGROUPS
};
static const struct option longopts[] = {
{ "help", no_argument, 0, 'h' },
@@ -76,20 +181,29 @@ int main(int argc, char *argv[])
{ "ipc", no_argument, 0, 'i' },
{ "net", no_argument, 0, 'n' },
{ "pid", no_argument, 0, 'p' },
+ { "user", no_argument, 0, 'U' },
{ "fork", no_argument, 0, 'f' },
{ "mount-proc", optional_argument, 0, OPT_MOUNTPROC },
+ { "map-root-user", no_argument, 0, 'r' },
+ { "propagation", required_argument, 0, OPT_PROPAGATION },
+ { "setgroups", required_argument, 0, OPT_SETGROUPS },
{ NULL, 0, 0, 0 }
};
+ int setgrpcmd = SETGROUPS_NONE;
int unshare_flags = 0;
- int c, forkit = 0;
+ int c, forkit = 0, maproot = 0;
const char *procmnt = NULL;
+ unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
+ uid_t real_euid = geteuid();
+ gid_t real_egid = getegid();;
setlocale(LC_ALL, "");
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
+ atexit(close_stdout);
- while ((c = getopt_long(argc, argv, "+fhVmuinp", longopts, NULL)) != -1) {
+ while ((c = getopt_long(argc, argv, "+fhVmuinpUr", longopts, NULL)) != -1) {
switch (c) {
case 'f':
forkit = 1;
@@ -114,10 +228,23 @@ int main(int argc, char *argv[])
case 'p':
unshare_flags |= CLONE_NEWPID;
break;
+ case 'U':
+ unshare_flags |= CLONE_NEWUSER;
+ break;
case OPT_MOUNTPROC:
unshare_flags |= CLONE_NEWNS;
procmnt = optarg ? optarg : "/proc";
break;
+ case 'r':
+ unshare_flags |= CLONE_NEWUSER;
+ maproot = 1;
+ break;
+ case OPT_SETGROUPS:
+ setgrpcmd = setgroups_str2id(optarg);
+ break;
+ case OPT_PROPAGATION:
+ propagation = parse_propagation(optarg);
+ break;
default:
usage(EXIT_FAILURE);
}
@@ -146,6 +273,25 @@ int main(int argc, char *argv[])
}
}
+ if (maproot) {
+ if (setgrpcmd == SETGROUPS_ALLOW)
+ errx(EXIT_FAILURE, _("options --setgroups=allow and "
+ "--map-root-user are mutually exclusive"));
+
+ /* since Linux 3.19 unprivileged writing of /proc/self/gid_map
+ * has s been disabled unless /proc/self/setgroups is written
+ * first to permanently disable the ability to call setgroups
+ * in that user namespace. */
+ setgroups_control(SETGROUPS_DENY);
+ map_id(_PATH_PROC_UIDMAP, 0, real_euid);
+ map_id(_PATH_PROC_GIDMAP, 0, real_egid);
+
+ } else if (setgrpcmd != SETGROUPS_NONE)
+ setgroups_control(setgrpcmd);
+
+ if ((unshare_flags & CLONE_NEWNS) && propagation)
+ set_propagation(propagation);
+
if (procmnt &&
(mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))