d2bdca
diff -up util-linux-2.23.2/include/pathnames.h.kzak util-linux-2.23.2/include/pathnames.h
d2bdca
--- util-linux-2.23.2/include/pathnames.h.kzak	2015-06-26 10:00:19.111877564 +0200
d2bdca
+++ util-linux-2.23.2/include/pathnames.h	2015-06-26 10:00:51.623630869 +0200
d2bdca
@@ -85,6 +85,10 @@
d2bdca
 #define _PATH_PROC_LOCKS        "/proc/locks"
d2bdca
 #define _PATH_PROC_CDROMINFO	"/proc/sys/dev/cdrom/info"
d2bdca
 
d2bdca
+#define _PATH_PROC_UIDMAP	"/proc/self/uid_map"
d2bdca
+#define _PATH_PROC_GIDMAP	"/proc/self/gid_map"
d2bdca
+#define _PATH_PROC_SETGROUPS	"/proc/self/setgroups"
d2bdca
+
d2bdca
 #define _PATH_PROC_ATTR_CURRENT	"/proc/self/attr/current"
d2bdca
 #define _PATH_PROC_ATTR_EXEC	"/proc/self/attr/exec"
d2bdca
 #define _PATH_PROC_CAPLASTCAP	"/proc/sys/kernel/cap_last_cap"
d2bdca
diff -up util-linux-2.23.2/sys-utils/Makemodule.am.kzak util-linux-2.23.2/sys-utils/Makemodule.am
d2bdca
diff -up util-linux-2.23.2/sys-utils/nsenter.1.kzak util-linux-2.23.2/sys-utils/nsenter.1
d2bdca
--- util-linux-2.23.2/sys-utils/nsenter.1.kzak	2015-06-26 09:58:39.468633643 +0200
d2bdca
+++ util-linux-2.23.2/sys-utils/nsenter.1	2015-06-26 09:58:51.672541041 +0200
d2bdca
@@ -1,44 +1,45 @@
d2bdca
-.TH NSENTER 1 "January 2013" "util-linux" "User Commands"
d2bdca
+.TH NSENTER 1 "June 2013" "util-linux" "User Commands"
d2bdca
 .SH NAME
d2bdca
 nsenter \- run program with namespaces of other processes
d2bdca
 .SH SYNOPSIS
d2bdca
 .B nsenter
d2bdca
-.RI [ options ]
d2bdca
-.RI [ program ]
d2bdca
-.RI [ arguments ]
d2bdca
+[options]
d2bdca
+.RI [ program
d2bdca
+.RI [ arguments ]]
d2bdca
 .SH DESCRIPTION
d2bdca
 Enters the namespaces of one or more other processes and then executes the specified
d2bdca
 program.  Enterable namespaces are:
d2bdca
 .TP
d2bdca
 .B mount namespace
d2bdca
-mounting and unmounting filesystems will not affect rest of the system
d2bdca
+Mounting and unmounting filesystems will not affect the rest of the system
d2bdca
 .RB ( CLONE_\:NEWNS
d2bdca
-flag), except for filesystems which are explicitly marked as shared (by mount
d2bdca
---make-\:shared).  See /proc\:/self\:/mountinfo for the shared flag.
d2bdca
+flag), except for filesystems which are explicitly marked as shared (with
d2bdca
+\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the
d2bdca
+\fBshared\fP flag).
d2bdca
 .TP
d2bdca
 .B UTS namespace
d2bdca
-setting hostname, domainname will not affect rest of the system
d2bdca
+Setting hostname or domainname will not affect the rest of the system.
d2bdca
 .RB ( CLONE_\:NEWUTS
d2bdca
-flag).
d2bdca
+flag)
d2bdca
 .TP
d2bdca
 .B IPC namespace
d2bdca
-process will have independent namespace for System V message queues, semaphore
d2bdca
-sets and shared memory segments
d2bdca
+The process will have an independent namespace for System V message queues,
d2bdca
+semaphore sets and shared memory segments.
d2bdca
 .RB ( CLONE_\:NEWIPC
d2bdca
-flag).
d2bdca
+flag)
d2bdca
 .TP
d2bdca
 .B network namespace
d2bdca
-process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall
d2bdca
-rules, the
d2bdca
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
d2bdca
+firewall rules, the
d2bdca
 .I /proc\:/net
d2bdca
 and
d2bdca
 .I /sys\:/class\:/net
d2bdca
-directory trees, sockets etc.
d2bdca
+directory trees, sockets, etc.
d2bdca
 .RB ( CLONE_\:NEWNET
d2bdca
-flag).
d2bdca
+flag)
d2bdca
 .TP
d2bdca
 .B PID namespace
d2bdca
-children will have a set of PID to process mappings separate from the
d2bdca
+Children will have a set of PID to process mappings separate from the
d2bdca
 .B nsenter
d2bdca
 process
d2bdca
 .RB ( CLONE_\:NEWPID
d2bdca
@@ -46,18 +47,18 @@ flag).
d2bdca
 .B nsenter
d2bdca
 will fork by default if changing the PID namespace, so that the new program
d2bdca
 and its children share the same PID namespace and are visible to each other.
d2bdca
-If \-\-no\-fork is used, the new program will be exec'ed without forking.
d2bdca
-.PP
d2bdca
-See the
d2bdca
-.BR clone (2)
d2bdca
-for exact semantics of the flags.
d2bdca
+If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking.
d2bdca
 .TP
d2bdca
-If program is not given, run ``${SHELL}'' (default: /bin\:/sh).
d2bdca
+.B user namespace
d2bdca
+The process will have a distinct set of UIDs, GIDs and capabilities.
d2bdca
+.RB ( CLONE_\:NEWUSER
d2bdca
+flag)
d2bdca
+.TP
d2bdca
+See \fBclone\fP(2) for the exact semantics of the flags.
d2bdca
+.TP
d2bdca
+If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh).
d2bdca
 
d2bdca
 .SH OPTIONS
d2bdca
-Argument with square brakets, such as [\fIfile\fR], means optional argument.
d2bdca
-Command line syntax to specify optional argument \-\-mount=/path\:/to\:/file.
d2bdca
-Please notice the equals sign.
d2bdca
 .TP
d2bdca
 \fB\-t\fR, \fB\-\-target\fR \fIpid\fP
d2bdca
 Specify a target process to get contexts from.  The paths to the contexts
d2bdca
@@ -83,6 +84,9 @@ the network namespace
d2bdca
 /proc/\fIpid\fR/ns/pid
d2bdca
 the PID namespace
d2bdca
 .TP
d2bdca
+/proc/\fIpid\fR/ns/user
d2bdca
+the user namespace
d2bdca
+.TP
d2bdca
 /proc/\fIpid\fR/root
d2bdca
 the root directory
d2bdca
 .TP
d2bdca
@@ -91,51 +95,71 @@ the working directory respectively
d2bdca
 .PD
d2bdca
 .RE
d2bdca
 .TP
d2bdca
-\fB\-m\fR, \fB\-\-mount\fR [\fIfile\fR]
d2bdca
-Enter the mount namespace.  If no file is specified enter the mount namespace
d2bdca
-of the target process.  If file is specified enter the mount namespace
d2bdca
+\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR]
d2bdca
+Enter the mount namespace.  If no file is specified, enter the mount namespace
d2bdca
+of the target process.  If file is specified, enter the mount namespace
d2bdca
 specified by file.
d2bdca
 .TP
d2bdca
-\fB\-u\fR, \fB\-\-uts\fR [\fIfile\fR]
d2bdca
-Enter the UTS namespace.  If no file is specified enter the UTS namespace of
d2bdca
-the target process.  If file is specified enter the UTS namespace specified by
d2bdca
+\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR]
d2bdca
+Enter the UTS namespace.  If no file is specified, enter the UTS namespace of
d2bdca
+the target process.  If file is specified, enter the UTS namespace specified by
d2bdca
 file.
d2bdca
 .TP
d2bdca
-\fB\-i\fR, \fB\-\-ipc\fR [\fIfile\fR]
d2bdca
-Enter the IPC namespace.  If no file is specified enter the IPC namespace of
d2bdca
-the target process.  If file is specified enter the IPC namespace specified by
d2bdca
+\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR]
d2bdca
+Enter the IPC namespace.  If no file is specified, enter the IPC namespace of
d2bdca
+the target process.  If file is specified, enter the IPC namespace specified by
d2bdca
 file.
d2bdca
 .TP
d2bdca
-\fB\-n\fR, \fB\-\-net\fR [\fIfile\fR]
d2bdca
-Enter the network namespace.  If no file is specified enter the network
d2bdca
-namespace of the target process.  If file is specified enter the network
d2bdca
+\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR]
d2bdca
+Enter the network namespace.  If no file is specified, enter the network
d2bdca
+namespace of the target process.  If file is specified, enter the network
d2bdca
 namespace specified by file.
d2bdca
 .TP
d2bdca
-\fB\-p\fR, \fB\-\-pid\fR [\fIfile\fR]
d2bdca
-Enter the PID namespace.  If no file is specified enter the PID namespace of
d2bdca
-the target process.  If file is specified enter the PID namespace specified by
d2bdca
+\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR]
d2bdca
+Enter the PID namespace.  If no file is specified, enter the PID namespace of
d2bdca
+the target process.  If file is specified, enter the PID namespace specified by
d2bdca
 file.
d2bdca
 .TP
d2bdca
-\fB\-r\fR, \fB\-\-root\fR [\fIdirectory\fR]
d2bdca
-Set the root directory.  If no directory is specified set the root directory to
d2bdca
-the root directory of the target process.  If directory is specified set the
d2bdca
+\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR]
d2bdca
+Enter the user namespace.  If no file is specified, enter the user namespace of
d2bdca
+the target process.  If file is specified, enter the user namespace specified by
d2bdca
+file.  See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options.
d2bdca
+.TP
d2bdca
+\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR
d2bdca
+Set the group ID which will be used in the entered namespace and drop
d2bdca
+supplementary groups.
d2bdca
+.BR nsenter (1)
d2bdca
+always sets GID for user namespaces, the default is 0.
d2bdca
+.TP
d2bdca
+\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR
d2bdca
+Set the user ID which will be used in the entered namespace.
d2bdca
+.BR nsenter (1)
d2bdca
+always sets UID for user namespaces, the default is 0.
d2bdca
+.TP
d2bdca
+\fB\-\-preserve\-credentials\fR
d2bdca
+Don't modify UID and GID when enter user namespace. The default is to
d2bdca
+drops supplementary groups and sets GID and UID to 0.
d2bdca
+.TP
d2bdca
+\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR]
d2bdca
+Set the root directory.  If no directory is specified, set the root directory to
d2bdca
+the root directory of the target process.  If directory is specified, set the
d2bdca
 root directory to the specified directory.
d2bdca
 .TP
d2bdca
-\fB\-w\fR, \fB\-\-wd\fR [\fIdirectory\fR]
d2bdca
-Set the working directory.  If no directory is specified set the working
d2bdca
+\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR]
d2bdca
+Set the working directory.  If no directory is specified, set the working
d2bdca
 directory to the working directory of the target process.  If directory is
d2bdca
-specified set the working directory to the specified directory.
d2bdca
+specified, set the working directory to the specified directory.
d2bdca
 .TP
d2bdca
-\fB\-F\fR, \fB\-\-no-fork\fR
d2bdca
-Do not fork before exec'ing the specified program.  By default when entering a
d2bdca
-pid namespace enter calls fork before calling exec so that the children will be
d2bdca
-in the newly entered pid namespace.
d2bdca
+\fB\-F\fR, \fB\-\-no\-fork\fR
d2bdca
+Do not fork before exec'ing the specified program.  By default, when entering a
d2bdca
+PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that
d2bdca
+any children will also be in the newly entered PID namespace.
d2bdca
 .TP
d2bdca
 \fB\-V\fR, \fB\-\-version\fR
d2bdca
 Display version information and exit.
d2bdca
 .TP
d2bdca
 \fB\-h\fR, \fB\-\-help\fR
d2bdca
-Print a help message.
d2bdca
+Display help text and exit.
d2bdca
 .SH SEE ALSO
d2bdca
 .BR setns (2),
d2bdca
 .BR clone (2)
d2bdca
diff -up util-linux-2.23.2/sys-utils/nsenter.c.kzak util-linux-2.23.2/sys-utils/nsenter.c
d2bdca
--- util-linux-2.23.2/sys-utils/nsenter.c.kzak	2015-06-26 09:58:39.468633643 +0200
d2bdca
+++ util-linux-2.23.2/sys-utils/nsenter.c	2015-06-26 09:58:51.673541033 +0200
d2bdca
@@ -28,6 +28,7 @@
d2bdca
 #include <assert.h>
d2bdca
 #include <sys/types.h>
d2bdca
 #include <sys/wait.h>
d2bdca
+#include <grp.h>
d2bdca
 
d2bdca
 #include "strutils.h"
d2bdca
 #include "nls.h"
d2bdca
@@ -42,7 +43,12 @@ static struct namespace_file {
d2bdca
 	int fd;
d2bdca
 } namespace_files[] = {
d2bdca
 	/* Careful the order is significant in this array.
d2bdca
+	 *
d2bdca
+	 * The user namespace comes first, so that it is entered
d2bdca
+	 * first.  This gives an unprivileged user the potential to
d2bdca
+	 * enter the other namespaces.
d2bdca
 	 */
d2bdca
+	{ .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 },
d2bdca
 	{ .nstype = CLONE_NEWIPC,  .name = "ns/ipc",  .fd = -1 },
d2bdca
 	{ .nstype = CLONE_NEWUTS,  .name = "ns/uts",  .fd = -1 },
d2bdca
 	{ .nstype = CLONE_NEWNET,  .name = "ns/net",  .fd = -1 },
d2bdca
@@ -56,18 +62,25 @@ static void usage(int status)
d2bdca
 	FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
d2bdca
 
d2bdca
 	fputs(USAGE_HEADER, out);
d2bdca
-	fprintf(out, _(" %s [options] <program> [args...]\n"),
d2bdca
+	fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
d2bdca
 		program_invocation_short_name);
d2bdca
 
d2bdca
+	fputs(USAGE_SEPARATOR, out);
d2bdca
+	fputs(_("Run a program with namespaces of other processes.\n"), out);
d2bdca
+
d2bdca
 	fputs(USAGE_OPTIONS, out);
d2bdca
 	fputs(_(" -t, --target <pid>     target process to get namespaces from\n"), out);
d2bdca
-	fputs(_(" -m, --mount [=<file>]  enter mount namespace\n"), out);
d2bdca
-	fputs(_(" -u, --uts   [=<file>]  enter UTS namespace (hostname etc)\n"), out);
d2bdca
-	fputs(_(" -i, --ipc   [=<file>]  enter System V IPC namespace\n"), out);
d2bdca
-	fputs(_(" -n, --net   [=<file>]  enter network namespace\n"), out);
d2bdca
-	fputs(_(" -p, --pid   [=<file>]  enter pid namespace\n"), out);
d2bdca
-	fputs(_(" -r, --root  [=<dir>]   set the root directory\n"), out);
d2bdca
-	fputs(_(" -w, --wd    [=<dir>]   set the working directory\n"), out);
d2bdca
+	fputs(_(" -m, --mount[=<file>]   enter mount namespace\n"), out);
d2bdca
+	fputs(_(" -u, --uts[=<file>]     enter UTS namespace (hostname etc)\n"), out);
d2bdca
+	fputs(_(" -i, --ipc[=<file>]     enter System V IPC namespace\n"), out);
d2bdca
+	fputs(_(" -n, --net[=<file>]     enter network namespace\n"), out);
d2bdca
+	fputs(_(" -p, --pid[=<file>]     enter pid namespace\n"), out);
d2bdca
+	fputs(_(" -U, --user[=<file>]    enter user namespace\n"), out);
d2bdca
+	fputs(_(" -S, --setuid <uid>     set uid in entered namespace\n"), out);
d2bdca
+	fputs(_(" -G, --setgid <gid>     set gid in entered namespace\n"), out);
d2bdca
+	fputs(_("     --preserve-credentials do not touch uids or gids\n"), out);
d2bdca
+	fputs(_(" -r, --root[=<dir>]     set the root directory\n"), out);
d2bdca
+	fputs(_(" -w, --wd[=<dir>]       set the working directory\n"), out);
d2bdca
 	fputs(_(" -F, --no-fork          do not fork before exec'ing <program>\n"), out);
d2bdca
 
d2bdca
 	fputs(USAGE_SEPARATOR, out);
d2bdca
@@ -153,6 +166,9 @@ static void continue_as_child(void)
d2bdca
 
d2bdca
 int main(int argc, char *argv[])
d2bdca
 {
d2bdca
+	enum {
d2bdca
+		OPT_PRESERVE_CRED = CHAR_MAX + 1
d2bdca
+	};
d2bdca
 	static const struct option longopts[] = {
d2bdca
 		{ "help", no_argument, NULL, 'h' },
d2bdca
 		{ "version", no_argument, NULL, 'V'},
d2bdca
@@ -162,24 +178,30 @@ int main(int argc, char *argv[])
d2bdca
 		{ "ipc", optional_argument, NULL, 'i' },
d2bdca
 		{ "net", optional_argument, NULL, 'n' },
d2bdca
 		{ "pid", optional_argument, NULL, 'p' },
d2bdca
+		{ "user", optional_argument, NULL, 'U' },
d2bdca
+		{ "setuid", required_argument, NULL, 'S' },
d2bdca
+		{ "setgid", required_argument, NULL, 'G' },
d2bdca
 		{ "root", optional_argument, NULL, 'r' },
d2bdca
 		{ "wd", optional_argument, NULL, 'w' },
d2bdca
 		{ "no-fork", no_argument, NULL, 'F' },
d2bdca
+		{ "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED },
d2bdca
 		{ NULL, 0, NULL, 0 }
d2bdca
 	};
d2bdca
 
d2bdca
 	struct namespace_file *nsfile;
d2bdca
-	int c, namespaces = 0;
d2bdca
-	bool do_rd = false, do_wd = false;
d2bdca
+	int c, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0;
d2bdca
+	bool do_rd = false, do_wd = false, force_uid = false, force_gid = false;
d2bdca
 	int do_fork = -1; /* unknown yet */
d2bdca
+	uid_t uid = 0;
d2bdca
+	gid_t gid = 0;
d2bdca
 
d2bdca
-	setlocale(LC_MESSAGES, "");
d2bdca
+	setlocale(LC_ALL, "");
d2bdca
 	bindtextdomain(PACKAGE, LOCALEDIR);
d2bdca
 	textdomain(PACKAGE);
d2bdca
 	atexit(close_stdout);
d2bdca
 
d2bdca
 	while ((c =
d2bdca
-		getopt_long(argc, argv, "hVt:m::u::i::n::p::r::w::F",
d2bdca
+		getopt_long(argc, argv, "+hVt:m::u::i::n::p::U::S:G:r::w::F",
d2bdca
 			    longopts, NULL)) != -1) {
d2bdca
 		switch (c) {
d2bdca
 		case 'h':
d2bdca
@@ -221,6 +243,20 @@ int main(int argc, char *argv[])
d2bdca
 			else
d2bdca
 				namespaces |= CLONE_NEWPID;
d2bdca
 			break;
d2bdca
+		case 'U':
d2bdca
+			if (optarg)
d2bdca
+				open_namespace_fd(CLONE_NEWUSER, optarg);
d2bdca
+			else
d2bdca
+				namespaces |= CLONE_NEWUSER;
d2bdca
+			break;
d2bdca
+		case 'S':
d2bdca
+			uid = strtoul_or_err(optarg, _("failed to parse uid"));
d2bdca
+			force_uid = true;
d2bdca
+			break;
d2bdca
+		case 'G':
d2bdca
+			gid = strtoul_or_err(optarg, _("failed to parse gid"));
d2bdca
+			force_gid = true;
d2bdca
+			break;
d2bdca
 		case 'F':
d2bdca
 			do_fork = 0;
d2bdca
 			break;
d2bdca
@@ -236,6 +272,9 @@ int main(int argc, char *argv[])
d2bdca
 			else
d2bdca
 				do_wd = true;
d2bdca
 			break;
d2bdca
+		case OPT_PRESERVE_CRED:
d2bdca
+			preserve_cred = 1;
d2bdca
+			break;
d2bdca
 		default:
d2bdca
 			usage(EXIT_FAILURE);
d2bdca
 		}
d2bdca
@@ -253,6 +292,26 @@ int main(int argc, char *argv[])
d2bdca
 		open_target_fd(&wd_fd, "cwd", NULL);
d2bdca
 
d2bdca
 	/*
d2bdca
+	 * Update namespaces variable to contain all requested namespaces
d2bdca
+	 */
d2bdca
+	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
d2bdca
+		if (nsfile->fd < 0)
d2bdca
+			continue;
d2bdca
+		namespaces |= nsfile->nstype;
d2bdca
+	}
d2bdca
+
d2bdca
+	/* for user namespaces we always set UID and GID (default is 0)
d2bdca
+	 * and clear root's groups if --preserve-credentials is no specified */
d2bdca
+	if ((namespaces & CLONE_NEWUSER) && !preserve_cred) {
d2bdca
+		force_uid = true, force_gid = true;
d2bdca
+
d2bdca
+		/* We call setgroups() before and after we enter user namespace,
d2bdca
+		 * let's complain only if both fail */
d2bdca
+		if (setgroups(0, NULL) != 0)
d2bdca
+			setgroups_nerrs++;
d2bdca
+	}
d2bdca
+
d2bdca
+	/*
d2bdca
 	 * Now that we know which namespaces we want to enter, enter them.
d2bdca
 	 */
d2bdca
 	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
d2bdca
@@ -302,6 +361,15 @@ int main(int argc, char *argv[])
d2bdca
 	if (do_fork == 1)
d2bdca
 		continue_as_child();
d2bdca
 
d2bdca
+	if (force_uid || force_gid) {
d2bdca
+		if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs)	/* drop supplementary groups */
d2bdca
+			err(EXIT_FAILURE, _("setgroups failed"));
d2bdca
+		if (force_gid && setgid(gid) < 0)		/* change GID */
d2bdca
+			err(EXIT_FAILURE, _("setgid failed"));
d2bdca
+		if (force_uid && setuid(uid) < 0)		/* change UID */
d2bdca
+			err(EXIT_FAILURE, _("setuid failed"));
d2bdca
+	}
d2bdca
+
d2bdca
 	if (optind < argc) {
d2bdca
 		execvp(argv[optind], argv + optind);
d2bdca
 		err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]);
d2bdca
diff -up util-linux-2.23.2/sys-utils/unshare.1.kzak util-linux-2.23.2/sys-utils/unshare.1
d2bdca
--- util-linux-2.23.2/sys-utils/unshare.1.kzak	2015-06-26 09:58:39.484633521 +0200
d2bdca
+++ util-linux-2.23.2/sys-utils/unshare.1	2015-06-26 09:58:51.673541033 +0200
d2bdca
@@ -1,28 +1,27 @@
d2bdca
-.\" Process this file with
d2bdca
-.\" groff -man -Tascii lscpu.1
d2bdca
-.\"
d2bdca
-.TH UNSHARE 1 "July 2013" "util-linux" "User Commands"
d2bdca
+.TH UNSHARE 1 "July 2014" "util-linux" "User Commands"
d2bdca
 .SH NAME
d2bdca
 unshare \- run program with some namespaces unshared from parent
d2bdca
 .SH SYNOPSIS
d2bdca
 .B unshare
d2bdca
-.RI [ options ]
d2bdca
+[options]
d2bdca
 .I program
d2bdca
 .RI [ arguments ]
d2bdca
 .SH DESCRIPTION
d2bdca
 Unshares the indicated namespaces from the parent process and then executes
d2bdca
-the specified program.  The namespaces to be unshared are indicated via
d2bdca
+the specified \fIprogram\fR.  The namespaces to be unshared are indicated via
d2bdca
 options.  Unshareable namespaces are:
d2bdca
 .TP
d2bdca
 .BR "mount namespace"
d2bdca
 Mounting and unmounting filesystems will not affect the rest of the system
d2bdca
 (\fBCLONE_NEWNS\fP flag), except for filesystems which are explicitly marked as
d2bdca
-shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP for the
d2bdca
-\fBshared\fP flags).
d2bdca
-
d2bdca
-It's recommended to use \fBmount --make-rprivate\fP or \fBmount --make-rslave\fP
d2bdca
-after \fBunshare --mount\fP to make sure that mountpoints in the new namespace
d2bdca
-are really unshared from parental namespace.
d2bdca
+shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or
d2bdca
+\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags).
d2bdca
+.sp
d2bdca
+.B unshare
d2bdca
+automatically sets propagation to \fBprivate\fP
d2bdca
+in the new mount namespace to make sure that the new namespace is really
d2bdca
+unshared. This feature is possible to disable by option \fB\-\-propagation unchanged\fP.
d2bdca
+Note that \fBprivate\fP is the kernel default.
d2bdca
 .TP
d2bdca
 .BR "UTS namespace"
d2bdca
 Setting hostname or domainname will not affect the rest of the system.
d2bdca
@@ -40,13 +39,14 @@ sockets, etc.  (\fBCLONE_NEWNET\fP flag)
d2bdca
 .BR "pid namespace"
d2bdca
 Children will have a distinct set of PID to process mappings from their parent.
d2bdca
 (\fBCLONE_NEWPID\fP flag)
d2bdca
+.TP
d2bdca
+.BR "user namespace"
d2bdca
+The process will have a distinct set of UIDs, GIDs and capabilities.
d2bdca
+(\fBCLONE_NEWUSER\fP flag)
d2bdca
 .PP
d2bdca
 See \fBclone\fR(2) for the exact semantics of the flags.
d2bdca
 .SH OPTIONS
d2bdca
 .TP
d2bdca
-.BR \-h , " \-\-help"
d2bdca
-Display help text and exit.
d2bdca
-.TP
d2bdca
 .BR \-i , " \-\-ipc"
d2bdca
 Unshare the IPC namespace.
d2bdca
 .TP
d2bdca
@@ -63,16 +63,68 @@ See also the \fB--fork\fP and \fB--mount
d2bdca
 .BR \-u , " \-\-uts"
d2bdca
 Unshare the UTS namespace.
d2bdca
 .TP
d2bdca
+.BR \-U , " \-\-user"
d2bdca
+Unshare the user namespace.
d2bdca
+.TP
d2bdca
 .BR \-f , " \-\-fork"
d2bdca
 Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than
d2bdca
 running it directly.  This is useful when creating a new pid namespace.
d2bdca
 .TP
d2bdca
-.BR \-\-mount-proc "[=\fImountpoint\fP]"
d2bdca
-Just before running the program, mount the proc filesystem at the \fImountpoint\fP
d2bdca
+.BR \-\-mount\-proc "[=\fImountpoint\fP]"
d2bdca
+Just before running the program, mount the proc filesystem at \fImountpoint\fP
d2bdca
 (default is /proc).  This is useful when creating a new pid namespace.  It also
d2bdca
 implies creating a new mount namespace since the /proc mount would otherwise
d2bdca
-mess up existing programs on the system. The new proc filesystem is explicitly
d2bdca
+mess up existing programs on the system.  The new proc filesystem is explicitly
d2bdca
 mounted as private (by MS_PRIVATE|MS_REC).
d2bdca
+.TP
d2bdca
+.BR \-r , " \-\-map\-root\-user"
d2bdca
+Run the program only after the current effective user and group IDs have been mapped to
d2bdca
+the superuser UID and GID in the newly created user namespace.  This makes it possible to
d2bdca
+conveniently gain capabilities needed to manage various aspects of the newly created
d2bdca
+namespaces (such as configuring interfaces in the network namespace or mounting filesystems in
d2bdca
+the mount namespace) even when run unprivileged.  As a mere convenience feature, it does not support
d2bdca
+more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs.
d2bdca
+This option implies --setgroups=deny.
d2bdca
+.TP
d2bdca
+.BR "\-\-propagation \fIprivate|shared|slave|unchanged\fP"
d2bdca
+Recursively sets mount propagation flag in the new mount namespace. The default
d2bdca
+is to set the propagation to \fIprivate\fP, this feature is possible to disable
d2bdca
+by \fIunchanged\fP argument. The options is silently ignored when mount namespace (\fB\-\-mount\fP)
d2bdca
+is not requested.
d2bdca
+.TP
d2bdca
+.BR "\-\-setgroups \fIallow|deny\fP"
d2bdca
+Allow or deny
d2bdca
+.BR setgroups (2)
d2bdca
+syscall in user namespaces.
d2bdca
+
d2bdca
+.BR setgroups(2)
d2bdca
+is only callable with CAP_SETGID and CAP_SETGID in a user
d2bdca
+namespace (since Linux 3.19) does not give you permission to call setgroups(2)
d2bdca
+until after GID map has been set. The GID map is writable by root when
d2bdca
+.BR setgroups(2)
d2bdca
+is enabled and GID map becomes writable by unprivileged processes when
d2bdca
+.BR setgroups(2)
d2bdca
+is permanently disabled.
d2bdca
+.TP
d2bdca
+.BR \-V , " \-\-version"
d2bdca
+Display version information and exit.
d2bdca
+.TP
d2bdca
+.BR \-h , " \-\-help"
d2bdca
+Display help text and exit.
d2bdca
+.SH EXAMPLES
d2bdca
+.TP
d2bdca
+.B # unshare --fork --pid --mount-proc readlink /proc/self
d2bdca
+.TQ
d2bdca
+1
d2bdca
+.br
d2bdca
+Establish a PID namespace, ensure we're PID 1 in it against newly mounted
d2bdca
+procfs instance.
d2bdca
+.TP
d2bdca
+.B $ unshare --map-root-user --user sh -c whoami
d2bdca
+.TQ
d2bdca
+root
d2bdca
+.br
d2bdca
+Establish a user namespace as an unprivileged user with a root user within it.
d2bdca
 .SH SEE ALSO
d2bdca
 .BR unshare (2),
d2bdca
 .BR clone (2),
d2bdca
diff -up util-linux-2.23.2/sys-utils/unshare.c.kzak util-linux-2.23.2/sys-utils/unshare.c
d2bdca
--- util-linux-2.23.2/sys-utils/unshare.c.kzak	2015-06-26 09:58:39.484633521 +0200
d2bdca
+++ util-linux-2.23.2/sys-utils/unshare.c	2015-06-26 09:58:51.673541033 +0200
d2bdca
@@ -32,19 +32,117 @@
d2bdca
 
d2bdca
 #include "nls.h"
d2bdca
 #include "c.h"
d2bdca
+#include "closestream.h"
d2bdca
 #include "namespace.h"
d2bdca
 #include "exec_shell.h"
d2bdca
 #include "xalloc.h"
d2bdca
 #include "pathnames.h"
d2bdca
+#include "all-io.h"
d2bdca
 
d2bdca
+/* 'private' is kernel default */
d2bdca
+#define UNSHARE_PROPAGATION_DEFAULT	(MS_REC | MS_PRIVATE)
d2bdca
+
d2bdca
+enum {
d2bdca
+	SETGROUPS_NONE = -1,
d2bdca
+	SETGROUPS_DENY = 0,
d2bdca
+	SETGROUPS_ALLOW = 1,
d2bdca
+};
d2bdca
+
d2bdca
+static const char *setgroups_strings[] =
d2bdca
+{
d2bdca
+	[SETGROUPS_DENY] = "deny",
d2bdca
+	[SETGROUPS_ALLOW] = "allow"
d2bdca
+};
d2bdca
+
d2bdca
+static int setgroups_str2id(const char *str)
d2bdca
+{
d2bdca
+	size_t i;
d2bdca
+
d2bdca
+	for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
d2bdca
+		if (strcmp(str, setgroups_strings[i]) == 0)
d2bdca
+			return i;
d2bdca
+
d2bdca
+	errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
d2bdca
+}
d2bdca
+
d2bdca
+static void setgroups_control(int action)
d2bdca
+{
d2bdca
+	const char *file = _PATH_PROC_SETGROUPS;
d2bdca
+	const char *cmd;
d2bdca
+	int fd;
d2bdca
+
d2bdca
+	if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
d2bdca
+		return;
d2bdca
+	cmd = setgroups_strings[action];
d2bdca
+
d2bdca
+	fd = open(file, O_WRONLY);
d2bdca
+	if (fd < 0) {
d2bdca
+		if (errno == ENOENT)
d2bdca
+			return;
d2bdca
+		 err(EXIT_FAILURE, _("cannot open %s"), file);
d2bdca
+	}
d2bdca
+
d2bdca
+	if (write_all(fd, cmd, strlen(cmd)))
d2bdca
+		err(EXIT_FAILURE, _("write failed %s"), file);
d2bdca
+	close(fd);
d2bdca
+}
d2bdca
+
d2bdca
+static void map_id(const char *file, uint32_t from, uint32_t to)
d2bdca
+{
d2bdca
+	char *buf;
d2bdca
+	int fd;
d2bdca
+
d2bdca
+	fd = open(file, O_WRONLY);
d2bdca
+	if (fd < 0)
d2bdca
+		 err(EXIT_FAILURE, _("cannot open %s"), file);
d2bdca
+
d2bdca
+	xasprintf(&buf, "%u %u 1", from, to);
d2bdca
+	if (write_all(fd, buf, strlen(buf)))
d2bdca
+		err(EXIT_FAILURE, _("write failed %s"), file);
d2bdca
+	free(buf);
d2bdca
+	close(fd);
d2bdca
+}
d2bdca
+
d2bdca
+static unsigned long parse_propagation(const char *str)
d2bdca
+{
d2bdca
+	size_t i;
d2bdca
+	static const struct prop_opts {
d2bdca
+		const char *name;
d2bdca
+		unsigned long flag;
d2bdca
+	} opts[] = {
d2bdca
+		{ "slave",	MS_REC | MS_SLAVE },
d2bdca
+		{ "private",	MS_REC | MS_PRIVATE },
d2bdca
+		{ "shared",     MS_REC | MS_SHARED },
d2bdca
+		{ "unchanged",        0 }
d2bdca
+	};
d2bdca
+
d2bdca
+	for (i = 0; i < ARRAY_SIZE(opts); i++) {
d2bdca
+		if (strcmp(opts[i].name, str) == 0)
d2bdca
+			return opts[i].flag;
d2bdca
+	}
d2bdca
+
d2bdca
+	errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
d2bdca
+}
d2bdca
+
d2bdca
+static void set_propagation(unsigned long flags)
d2bdca
+{
d2bdca
+	if (flags == 0)
d2bdca
+		return;
d2bdca
+
d2bdca
+	if (mount("none", "/", NULL, flags, NULL) != 0)
d2bdca
+		err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
d2bdca
+}
d2bdca
 
d2bdca
 static void usage(int status)
d2bdca
 {
d2bdca
 	FILE *out = status == EXIT_SUCCESS ? stdout : stderr;
d2bdca
 
d2bdca
 	fputs(USAGE_HEADER, out);
d2bdca
-	fprintf(out,
d2bdca
-	      _(" %s [options] <program> [args...]\n"),	program_invocation_short_name);
d2bdca
+	fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
d2bdca
+		program_invocation_short_name);
d2bdca
+
d2bdca
+	fputs(USAGE_SEPARATOR, out);
d2bdca
+	fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
d2bdca
 
d2bdca
 	fputs(USAGE_OPTIONS, out);
d2bdca
 	fputs(_(" -m, --mount               unshare mounts namespace\n"), out);
d2bdca
@@ -52,8 +150,13 @@ static void usage(int status)
d2bdca
 	fputs(_(" -i, --ipc                 unshare System V IPC namespace\n"), out);
d2bdca
 	fputs(_(" -n, --net                 unshare network namespace\n"), out);
d2bdca
 	fputs(_(" -p, --pid                 unshare pid namespace\n"), out);
d2bdca
+	fputs(_(" -U, --user                unshare user namespace\n"), out);
d2bdca
 	fputs(_(" -f, --fork                fork before launching <program>\n"), out);
d2bdca
 	fputs(_("     --mount-proc[=<dir>]  mount proc filesystem first (implies --mount)\n"), out);
d2bdca
+	fputs(_(" -r, --map-root-user       map current user to root (implies --user)\n"), out);
d2bdca
+	fputs(_("     --propagation <slave|shared|private|unchanged>\n"
d2bdca
+	        "                           modify mount propagation in mount namespace\n"), out);
d2bdca
+	fputs(_(" -s, --setgroups allow|deny  control the setgroups syscall in user namespaces\n"), out);
d2bdca
 
d2bdca
 	fputs(USAGE_SEPARATOR, out);
d2bdca
 	fputs(USAGE_HELP, out);
d2bdca
@@ -66,7 +169,9 @@ static void usage(int status)
d2bdca
 int main(int argc, char *argv[])
d2bdca
 {
d2bdca
 	enum {
d2bdca
-		OPT_MOUNTPROC = CHAR_MAX + 1
d2bdca
+		OPT_MOUNTPROC = CHAR_MAX + 1,
d2bdca
+		OPT_PROPAGATION,
d2bdca
+		OPT_SETGROUPS
d2bdca
 	};
d2bdca
 	static const struct option longopts[] = {
d2bdca
 		{ "help", no_argument, 0, 'h' },
d2bdca
@@ -76,20 +181,29 @@ int main(int argc, char *argv[])
d2bdca
 		{ "ipc", no_argument, 0, 'i' },
d2bdca
 		{ "net", no_argument, 0, 'n' },
d2bdca
 		{ "pid", no_argument, 0, 'p' },
d2bdca
+		{ "user", no_argument, 0, 'U' },
d2bdca
 		{ "fork", no_argument, 0, 'f' },
d2bdca
 		{ "mount-proc", optional_argument, 0, OPT_MOUNTPROC },
d2bdca
+		{ "map-root-user", no_argument, 0, 'r' },
d2bdca
+		{ "propagation", required_argument, 0, OPT_PROPAGATION },
d2bdca
+		{ "setgroups", required_argument, 0, OPT_SETGROUPS },
d2bdca
 		{ NULL, 0, 0, 0 }
d2bdca
 	};
d2bdca
 
d2bdca
+	int setgrpcmd = SETGROUPS_NONE;
d2bdca
 	int unshare_flags = 0;
d2bdca
-	int c, forkit = 0;
d2bdca
+	int c, forkit = 0, maproot = 0;
d2bdca
 	const char *procmnt = NULL;
d2bdca
+	unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
d2bdca
+	uid_t real_euid = geteuid();
d2bdca
+	gid_t real_egid = getegid();;
d2bdca
 
d2bdca
 	setlocale(LC_ALL, "");
d2bdca
 	bindtextdomain(PACKAGE, LOCALEDIR);
d2bdca
 	textdomain(PACKAGE);
d2bdca
+	atexit(close_stdout);
d2bdca
 
d2bdca
-	while ((c = getopt_long(argc, argv, "+fhVmuinp", longopts, NULL)) != -1) {
d2bdca
+	while ((c = getopt_long(argc, argv, "+fhVmuinpUr", longopts, NULL)) != -1) {
d2bdca
 		switch (c) {
d2bdca
 		case 'f':
d2bdca
 			forkit = 1;
d2bdca
@@ -114,10 +228,23 @@ int main(int argc, char *argv[])
d2bdca
 		case 'p':
d2bdca
 			unshare_flags |= CLONE_NEWPID;
d2bdca
 			break;
d2bdca
+		case 'U':
d2bdca
+			unshare_flags |= CLONE_NEWUSER;
d2bdca
+			break;
d2bdca
 		case OPT_MOUNTPROC:
d2bdca
 			unshare_flags |= CLONE_NEWNS;
d2bdca
 			procmnt = optarg ? optarg : "/proc";
d2bdca
 			break;
d2bdca
+		case 'r':
d2bdca
+			unshare_flags |= CLONE_NEWUSER;
d2bdca
+			maproot = 1;
d2bdca
+			break;
d2bdca
+		case OPT_SETGROUPS:
d2bdca
+			setgrpcmd = setgroups_str2id(optarg);
d2bdca
+			break;
d2bdca
+		case OPT_PROPAGATION:
d2bdca
+			propagation = parse_propagation(optarg);
d2bdca
+			break;
d2bdca
 		default:
d2bdca
 			usage(EXIT_FAILURE);
d2bdca
 		}
d2bdca
@@ -146,6 +273,25 @@ int main(int argc, char *argv[])
d2bdca
 		}
d2bdca
 	}
d2bdca
 
d2bdca
+	if (maproot) {
d2bdca
+		if (setgrpcmd == SETGROUPS_ALLOW)
d2bdca
+			errx(EXIT_FAILURE, _("options --setgroups=allow and "
d2bdca
+					"--map-root-user are mutually exclusive"));
d2bdca
+
d2bdca
+		/* since Linux 3.19 unprivileged writing of /proc/self/gid_map
d2bdca
+		 * has s been disabled unless /proc/self/setgroups is written
d2bdca
+		 * first to permanently disable the ability to call setgroups
d2bdca
+		 * in that user namespace. */
d2bdca
+		setgroups_control(SETGROUPS_DENY);
d2bdca
+		map_id(_PATH_PROC_UIDMAP, 0, real_euid);
d2bdca
+		map_id(_PATH_PROC_GIDMAP, 0, real_egid);
d2bdca
+
d2bdca
+	} else if (setgrpcmd != SETGROUPS_NONE)
d2bdca
+		setgroups_control(setgrpcmd);
d2bdca
+
d2bdca
+	if ((unshare_flags & CLONE_NEWNS) && propagation)
d2bdca
+		set_propagation(propagation);
d2bdca
+
d2bdca
 	if (procmnt &&
d2bdca
 	    (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
d2bdca
 	     mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))