|
|
5a015b |
From fb510f4e3dc6c13696bce6d3a79b8cea9b03b044 Mon Sep 17 00:00:00 2001
|
|
|
5a015b |
From: =?UTF-8?q?Nikola=20Forr=C3=B3?= <nforro@redhat.com>
|
|
|
5a015b |
Date: Mon, 22 May 2017 14:51:53 +0200
|
|
|
5a015b |
Subject: [PATCH 1/2] clone.2: document features related to namespaces
|
|
|
5a015b |
|
|
|
5a015b |
---
|
|
|
5a015b |
man-pages/man2/____clone.2 | 524 ++++++++++++++++++++++++++++-----------------
|
|
|
5a015b |
man-pages/man2/clone.2 | 524 ++++++++++++++++++++++++++++-----------------
|
|
|
5a015b |
2 files changed, 658 insertions(+), 390 deletions(-)
|
|
|
5a015b |
|
|
|
5a015b |
diff --git a/man-pages/man2/____clone.2 b/man-pages/man2/____clone.2
|
|
|
5a015b |
index 56d03cf..edf0994 100644
|
|
|
5a015b |
--- a/man-pages/man2/____clone.2
|
|
|
5a015b |
+++ b/man-pages/man2/____clone.2
|
|
|
5a015b |
@@ -39,50 +39,23 @@
|
|
|
5a015b |
.\" 2008-11-19, mtk, document CLONE_NEWIPC
|
|
|
5a015b |
.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
|
|
|
5a015b |
.\"
|
|
|
5a015b |
-.\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23
|
|
|
5a015b |
-.\" (also supported for unshare()?)
|
|
|
5a015b |
-.\"
|
|
|
5a015b |
-.TH CLONE 2 2013-04-16 "Linux" "Linux Programmer's Manual"
|
|
|
5a015b |
+.TH CLONE 2 2016-12-12 "Linux" "Linux Programmer's Manual"
|
|
|
5a015b |
.SH NAME
|
|
|
5a015b |
clone, __clone2 \- create a child process
|
|
|
5a015b |
.SH SYNOPSIS
|
|
|
5a015b |
.nf
|
|
|
5a015b |
/* Prototype for the glibc wrapper function */
|
|
|
5a015b |
|
|
|
5a015b |
+.B #define _GNU_SOURCE
|
|
|
5a015b |
.B #include <sched.h>
|
|
|
5a015b |
|
|
|
5a015b |
.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
|
|
|
5a015b |
.BI " int " flags ", void *" "arg" ", ... "
|
|
|
5a015b |
-.BI " /* pid_t *" ptid ", struct user_desc *" tls \
|
|
|
5a015b |
+.BI " /* pid_t *" ptid ", void *" newtls \
|
|
|
5a015b |
", pid_t *" ctid " */ );"
|
|
|
5a015b |
|
|
|
5a015b |
-/* Prototype for the raw system call */
|
|
|
5a015b |
-
|
|
|
5a015b |
-.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
-.BI " void *" ptid ", void *" ctid ,
|
|
|
5a015b |
-.BI " struct pt_regs *" regs );
|
|
|
5a015b |
+/* For the prototype of the raw system call, see NOTES */
|
|
|
5a015b |
.fi
|
|
|
5a015b |
-.sp
|
|
|
5a015b |
-.in -4n
|
|
|
5a015b |
-Feature Test Macro Requirements for glibc wrapper function (see
|
|
|
5a015b |
-.BR feature_test_macros (7)):
|
|
|
5a015b |
-.in
|
|
|
5a015b |
-.sp
|
|
|
5a015b |
-.BR clone ():
|
|
|
5a015b |
-.ad l
|
|
|
5a015b |
-.RS 4
|
|
|
5a015b |
-.PD 0
|
|
|
5a015b |
-.TP 4
|
|
|
5a015b |
-Since glibc 2.14:
|
|
|
5a015b |
-_GNU_SOURCE
|
|
|
5a015b |
-.TP 4
|
|
|
5a015b |
-.\" FIXME See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
|
|
|
5a015b |
-Before glibc 2.14:
|
|
|
5a015b |
-_BSD_SOURCE || _SVID_SOURCE
|
|
|
5a015b |
- /* _GNU_SOURCE also suffices */
|
|
|
5a015b |
-.PD
|
|
|
5a015b |
-.RE
|
|
|
5a015b |
-.ad b
|
|
|
5a015b |
.SH DESCRIPTION
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
creates a new process, in a manner similar to
|
|
|
5a015b |
@@ -107,7 +80,7 @@ But see the description of
|
|
|
5a015b |
.B CLONE_PARENT
|
|
|
5a015b |
below.)
|
|
|
5a015b |
|
|
|
5a015b |
-The main use of
|
|
|
5a015b |
+One use of
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
is to implement threads: multiple threads of control in a program that
|
|
|
5a015b |
run concurrently in a shared memory space.
|
|
|
5a015b |
@@ -180,7 +153,7 @@ in order to specify what is shared between the calling process
|
|
|
5a015b |
and the child process:
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Erase child thread ID at location
|
|
|
5a015b |
+Clear (zero) the child thread ID at the location
|
|
|
5a015b |
.I ctid
|
|
|
5a015b |
in child memory when the child exits, and do a wakeup on the futex
|
|
|
5a015b |
at that address.
|
|
|
5a015b |
@@ -190,9 +163,12 @@ system call.
|
|
|
5a015b |
This is used by threading libraries.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Store child thread ID at location
|
|
|
5a015b |
+Store the child thread ID at the location
|
|
|
5a015b |
.I ctid
|
|
|
5a015b |
-in child memory.
|
|
|
5a015b |
+in the child's memory.
|
|
|
5a015b |
+The store operation completes before
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+returns control to user space.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_FILES " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -206,27 +182,31 @@ or changes its associated flags (using the
|
|
|
5a015b |
.BR fcntl (2)
|
|
|
5a015b |
.B F_SETFD
|
|
|
5a015b |
operation), the other process is also affected.
|
|
|
5a015b |
+If a process sharing a file descriptor table calls
|
|
|
5a015b |
+.BR execve (2),
|
|
|
5a015b |
+its file descriptor table is duplicated (unshared).
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FILES
|
|
|
5a015b |
is not set, the child process inherits a copy of all file descriptors
|
|
|
5a015b |
opened in the calling process at the time of
|
|
|
5a015b |
.BR clone ().
|
|
|
5a015b |
-(The duplicated file descriptors in the child refer to the
|
|
|
5a015b |
-same open file descriptions (see
|
|
|
5a015b |
-.BR open (2))
|
|
|
5a015b |
-as the corresponding file descriptors in the calling process.)
|
|
|
5a015b |
Subsequent operations that open or close file descriptors,
|
|
|
5a015b |
or change file descriptor flags,
|
|
|
5a015b |
performed by either the calling
|
|
|
5a015b |
process or the child process do not affect the other process.
|
|
|
5a015b |
+Note, however,
|
|
|
5a015b |
+that the duplicated file descriptors in the child refer to the same open file
|
|
|
5a015b |
+descriptions as the corresponding file descriptors in the calling process,
|
|
|
5a015b |
+and thus share file offsets and file status flags (see
|
|
|
5a015b |
+.BR open (2)).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_FS " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
-is set, the caller and the child process share the same file system
|
|
|
5a015b |
+is set, the caller and the child process share the same filesystem
|
|
|
5a015b |
information.
|
|
|
5a015b |
-This includes the root of the file system, the current
|
|
|
5a015b |
+This includes the root of the filesystem, the current
|
|
|
5a015b |
working directory, and the umask.
|
|
|
5a015b |
Any call to
|
|
|
5a015b |
.BR chroot (2),
|
|
|
5a015b |
@@ -238,7 +218,7 @@ other process.
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
-is not set, the child process works on a copy of the file system
|
|
|
5a015b |
+is not set, the child process works on a copy of the filesystem
|
|
|
5a015b |
information of the calling process at the time of the
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
call.
|
|
|
5a015b |
@@ -258,7 +238,7 @@ If this flag is not set, then (as with
|
|
|
5a015b |
the new process has its own I/O context.
|
|
|
5a015b |
|
|
|
5a015b |
.\" The following based on text from Jens Axboe
|
|
|
5a015b |
-The I/O context is the I/O scope of the disk scheduler (i.e,
|
|
|
5a015b |
+The I/O context is the I/O scope of the disk scheduler (i.e.,
|
|
|
5a015b |
what the I/O scheduler uses to model scheduling of a process's I/O).
|
|
|
5a015b |
If processes share the same I/O context,
|
|
|
5a015b |
they are treated as one by the I/O scheduler.
|
|
|
5a015b |
@@ -288,7 +268,7 @@ the process is created in the same IPC namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
-An IPC namespace provides an isolated view of System V IPC objects (see
|
|
|
5a015b |
+An IPC namespace provides an isolated view of System\ V IPC objects (see
|
|
|
5a015b |
.BR svipc (7))
|
|
|
5a015b |
and (since Linux 2.6.30)
|
|
|
5a015b |
.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
|
|
|
5a015b |
@@ -308,17 +288,17 @@ When an IPC namespace is destroyed
|
|
|
5a015b |
(i.e., when the last process that is a member of the namespace terminates),
|
|
|
5a015b |
all IPC objects in the namespace are automatically destroyed.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_SYSVIPC
|
|
|
5a015b |
-and
|
|
|
5a015b |
-.B CONFIG_IPC_NS
|
|
|
5a015b |
-options and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWIPC .
|
|
|
5a015b |
This flag can't be specified in conjunction with
|
|
|
5a015b |
.BR CLONE_SYSVSEM .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on IPC namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWNET " (since Linux 2.6.24)"
|
|
|
5a015b |
-.\" FIXME Check when the implementation was completed
|
|
|
5a015b |
(The implementation of this flag was completed only
|
|
|
5a015b |
by about kernel version 2.6.29.)
|
|
|
5a015b |
|
|
|
5a015b |
@@ -326,7 +306,7 @@ If
|
|
|
5a015b |
.B CLONE_NEWNET
|
|
|
5a015b |
is set, then create the process in a new network namespace.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same network namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
@@ -341,7 +321,7 @@ directory trees, sockets, etc.).
|
|
|
5a015b |
A physical network device can live in exactly one
|
|
|
5a015b |
network namespace.
|
|
|
5a015b |
A virtual network device ("veth") pair provides a pipe-like abstraction
|
|
|
5a015b |
-.\" FIXME Add pointer to veth(4) page when it is eventually completed
|
|
|
5a015b |
+.\" FIXME . Add pointer to veth(4) page when it is eventually completed
|
|
|
5a015b |
that can be used to create tunnels between network namespaces,
|
|
|
5a015b |
and can be used to create a bridge to a physical network device
|
|
|
5a015b |
in another namespace.
|
|
|
5a015b |
@@ -350,54 +330,41 @@ When a network namespace is freed
|
|
|
5a015b |
(i.e., when the last process in the namespace terminates),
|
|
|
5a015b |
its physical network devices are moved back to the
|
|
|
5a015b |
initial network namespace (not to the parent of the process).
|
|
|
5a015b |
+For further information on network namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_NET_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWNET .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWNS " (since Linux 2.4.19)"
|
|
|
5a015b |
-Start the child in a new mount namespace.
|
|
|
5a015b |
-
|
|
|
5a015b |
-Every process lives in a mount namespace.
|
|
|
5a015b |
-The
|
|
|
5a015b |
-.I namespace
|
|
|
5a015b |
-of a process is the data (the set of mounts) describing the file hierarchy
|
|
|
5a015b |
-as seen by that process.
|
|
|
5a015b |
-After a
|
|
|
5a015b |
-.BR fork (2)
|
|
|
5a015b |
-or
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-where the
|
|
|
5a015b |
-.B CLONE_NEWNS
|
|
|
5a015b |
-flag is not set, the child lives in the same mount
|
|
|
5a015b |
-namespace as the parent.
|
|
|
5a015b |
-The system calls
|
|
|
5a015b |
-.BR mount (2)
|
|
|
5a015b |
-and
|
|
|
5a015b |
-.BR umount (2)
|
|
|
5a015b |
-change the mount namespace of the calling process, and hence affect
|
|
|
5a015b |
-all processes that live in the same namespace, but do not affect
|
|
|
5a015b |
-processes in a different mount namespace.
|
|
|
5a015b |
-
|
|
|
5a015b |
-After a
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-where the
|
|
|
5a015b |
+If
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
-flag is set, the cloned child is started in a new mount namespace,
|
|
|
5a015b |
+is set, the cloned child is started in a new mount namespace,
|
|
|
5a015b |
initialized with a copy of the namespace of the parent.
|
|
|
5a015b |
-
|
|
|
5a015b |
-Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability)
|
|
|
5a015b |
-may specify the
|
|
|
5a015b |
+If
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
-flag.
|
|
|
5a015b |
+is not set, the child lives in the same mount
|
|
|
5a015b |
+namespace as the parent.
|
|
|
5a015b |
+
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWNS .
|
|
|
5a015b |
It is not permitted to specify both
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
and
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
+.\" See https://lwn.net/Articles/543273/
|
|
|
5a015b |
in the same
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
call.
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on mount namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR mount_namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWPID " (since Linux 2.6.24)"
|
|
|
5a015b |
.\" This explanation draws a lot of details from
|
|
|
5a015b |
@@ -411,73 +378,74 @@ If
|
|
|
5a015b |
.B CLONE_NEWPID
|
|
|
5a015b |
is set, then create the process in a new PID namespace.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same PID namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
-A PID namespace provides an isolated environment for PIDs:
|
|
|
5a015b |
-PIDs in a new namespace start at 1,
|
|
|
5a015b |
-somewhat like a standalone system, and calls to
|
|
|
5a015b |
-.BR fork (2),
|
|
|
5a015b |
-.BR vfork (2),
|
|
|
5a015b |
+For further information on PID namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR pid_namespaces (7).
|
|
|
5a015b |
+
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWPID .
|
|
|
5a015b |
+This flag can't be specified in conjunction with
|
|
|
5a015b |
+.BR CLONE_THREAD
|
|
|
5a015b |
or
|
|
|
5a015b |
+.BR CLONE_PARENT .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+(This flag first became meaningful for
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
-will produce processes with PIDs that are unique within the namespace.
|
|
|
5a015b |
+in Linux 2.6.23,
|
|
|
5a015b |
+the current
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+semantics were merged in Linux 3.5,
|
|
|
5a015b |
+and the final pieces to make the user namespaces completely usable were
|
|
|
5a015b |
+merged in Linux 3.8.)
|
|
|
5a015b |
|
|
|
5a015b |
-The first process created in a new namespace
|
|
|
5a015b |
-(i.e., the process created using the
|
|
|
5a015b |
-.BR CLONE_NEWPID
|
|
|
5a015b |
-flag) has the PID 1, and is the "init" process for the namespace.
|
|
|
5a015b |
-Children that are orphaned within the namespace will be reparented
|
|
|
5a015b |
-to this process rather than
|
|
|
5a015b |
-.BR init (8).
|
|
|
5a015b |
-Unlike the traditional
|
|
|
5a015b |
-.B init
|
|
|
5a015b |
-process, the "init" process of a PID namespace can terminate,
|
|
|
5a015b |
-and if it does, all of the processes in the namespace are terminated.
|
|
|
5a015b |
-
|
|
|
5a015b |
-PID namespaces form a hierarchy.
|
|
|
5a015b |
-When a new PID namespace is created,
|
|
|
5a015b |
-the processes in that namespace are visible
|
|
|
5a015b |
-in the PID namespace of the process that created the new namespace;
|
|
|
5a015b |
-analogously, if the parent PID namespace is itself
|
|
|
5a015b |
-the child of another PID namespace,
|
|
|
5a015b |
-then processes in the child and parent PID namespaces will both be
|
|
|
5a015b |
-visible in the grandparent PID namespace.
|
|
|
5a015b |
-Conversely, the processes in the "child" PID namespace do not see
|
|
|
5a015b |
-the processes in the parent namespace.
|
|
|
5a015b |
-The existence of a namespace hierarchy means that each process
|
|
|
5a015b |
-may now have multiple PIDs:
|
|
|
5a015b |
-one for each namespace in which it is visible;
|
|
|
5a015b |
-each of these PIDs is unique within the corresponding namespace.
|
|
|
5a015b |
-(A call to
|
|
|
5a015b |
-.BR getpid (2)
|
|
|
5a015b |
-always returns the PID associated with the namespace in which
|
|
|
5a015b |
-the process lives.)
|
|
|
5a015b |
-
|
|
|
5a015b |
-After creating the new namespace,
|
|
|
5a015b |
-it is useful for the child to change its root directory
|
|
|
5a015b |
-and mount a new procfs instance at
|
|
|
5a015b |
-.I /proc
|
|
|
5a015b |
-so that tools such as
|
|
|
5a015b |
-.BR ps (1)
|
|
|
5a015b |
-work correctly.
|
|
|
5a015b |
-.\" mount -t proc proc /proc
|
|
|
5a015b |
-(If
|
|
|
5a015b |
-.BR CLONE_NEWNS
|
|
|
5a015b |
-is also included in
|
|
|
5a015b |
-.IR flags ,
|
|
|
5a015b |
-then it isn't necessary to change the root directory:
|
|
|
5a015b |
-a new procfs instance can be mounted directly over
|
|
|
5a015b |
-.IR /proc .)
|
|
|
5a015b |
+If
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+is set, then create the process in a new user namespace.
|
|
|
5a015b |
+If this flag is not set, then (as with
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
+the process is created in the same user namespace as the calling process.
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on user namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR user_namespaces (7)
|
|
|
5a015b |
+
|
|
|
5a015b |
+Before Linux 3.8, use of
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+required that the caller have three capabilities:
|
|
|
5a015b |
+.BR CAP_SYS_ADMIN ,
|
|
|
5a015b |
+.BR CAP_SETUID ,
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR CAP_SETGID .
|
|
|
5a015b |
+.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
|
|
|
5a015b |
+Starting with Linux 3.8,
|
|
|
5a015b |
+no privileges are needed to create a user namespace.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_PID_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
This flag can't be specified in conjunction with
|
|
|
5a015b |
-.BR CLONE_THREAD .
|
|
|
5a015b |
+.BR CLONE_THREAD
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_PARENT .
|
|
|
5a015b |
+For security reasons,
|
|
|
5a015b |
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
|
|
|
5a015b |
+.\" https://lwn.net/Articles/543273/
|
|
|
5a015b |
+.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
|
|
|
5a015b |
+.\" were, for practical purposes, unusable in earlier 3.8.x because of the
|
|
|
5a015b |
+.\" various filesystems that didn't support userns.
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+cannot be specified in conjunction with
|
|
|
5a015b |
+.BR CLONE_FS .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on user namespaces, see
|
|
|
5a015b |
+.BR user_namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWUTS " (since Linux 2.6.19)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -486,27 +454,29 @@ is set, then create the process in a new UTS namespace,
|
|
|
5a015b |
whose identifiers are initialized by duplicating the identifiers
|
|
|
5a015b |
from the UTS namespace of the calling process.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same UTS namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
A UTS namespace is the set of identifiers returned by
|
|
|
5a015b |
.BR uname (2);
|
|
|
5a015b |
-among these, the domain name and the host name can be modified by
|
|
|
5a015b |
+among these, the domain name and the hostname can be modified by
|
|
|
5a015b |
.BR setdomainname (2)
|
|
|
5a015b |
and
|
|
|
5a015b |
-.BR
|
|
|
5a015b |
.BR sethostname (2),
|
|
|
5a015b |
respectively.
|
|
|
5a015b |
Changes made to the identifiers in a UTS namespace
|
|
|
5a015b |
are visible to all other processes in the same namespace,
|
|
|
5a015b |
but are not visible to processes in other UTS namespaces.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_UTS_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWUTS .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on UTS namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PARENT " (since Linux 2.3.12)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -530,12 +500,15 @@ is set, then the parent of the calling process, rather than the
|
|
|
5a015b |
calling process itself, will be signaled.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Store child thread ID at location
|
|
|
5a015b |
+Store the child thread ID at the location
|
|
|
5a015b |
.I ptid
|
|
|
5a015b |
-in parent and child memory.
|
|
|
5a015b |
+in the parent's memory.
|
|
|
5a015b |
(In Linux 2.5.32-2.5.48 there was a flag
|
|
|
5a015b |
.B CLONE_SETTID
|
|
|
5a015b |
that did this.)
|
|
|
5a015b |
+The store operation completes before
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+returns control to user space.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PID " (obsolete)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -547,6 +520,7 @@ of not much use.
|
|
|
5a015b |
Since 2.3.21 this flag can be
|
|
|
5a015b |
specified only by the system boot process (PID 0).
|
|
|
5a015b |
It disappeared in Linux 2.5.16.
|
|
|
5a015b |
+Since then, the kernel silently ignores it without error.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PTRACE " (since Linux 2.2)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -556,11 +530,25 @@ then trace the child also (see
|
|
|
5a015b |
.BR ptrace (2)).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SETTLS " (since Linux 2.5.32)"
|
|
|
5a015b |
-The
|
|
|
5a015b |
+The TLS (Thread Local Storage) descriptor is set to
|
|
|
5a015b |
+.I newtls.
|
|
|
5a015b |
+
|
|
|
5a015b |
+The interpretation of
|
|
|
5a015b |
.I newtls
|
|
|
5a015b |
-argument is the new TLS (Thread Local Storage) descriptor.
|
|
|
5a015b |
+and the resulting effect is architecture dependent.
|
|
|
5a015b |
+On x86,
|
|
|
5a015b |
+.I newtls
|
|
|
5a015b |
+is interpreted as a
|
|
|
5a015b |
+.IR "struct user_desc *"
|
|
|
5a015b |
(See
|
|
|
5a015b |
-.BR set_thread_area (2).)
|
|
|
5a015b |
+.BR set_thread_area (2)).
|
|
|
5a015b |
+On x86_64 it is the new value to be set for the %fs base register
|
|
|
5a015b |
+(See the
|
|
|
5a015b |
+.I ARCH_SET_FS
|
|
|
5a015b |
+argument to
|
|
|
5a015b |
+.BR arch_prctl (2)).
|
|
|
5a015b |
+On architectures with a dedicated TLS register, it is the new value
|
|
|
5a015b |
+of that register.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SIGHAND " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -612,16 +600,26 @@ from Linux 2.6.25 onward,
|
|
|
5a015b |
and was
|
|
|
5a015b |
.I removed
|
|
|
5a015b |
altogether in Linux 2.6.38.
|
|
|
5a015b |
+Since then, the kernel silently ignores it without error.
|
|
|
5a015b |
.\" glibc 2.8 removed this defn from bits/sched.h
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_SYSVSEM
|
|
|
5a015b |
is set, then the child and the calling process share
|
|
|
5a015b |
-a single list of System V semaphore undo values (see
|
|
|
5a015b |
+a single list of System V semaphore adjustment
|
|
|
5a015b |
+.RI ( semadj )
|
|
|
5a015b |
+values (see
|
|
|
5a015b |
.BR semop (2)).
|
|
|
5a015b |
-If this flag is not set, then the child has a separate undo list,
|
|
|
5a015b |
-which is initially empty.
|
|
|
5a015b |
+In this case, the shared list accumulates
|
|
|
5a015b |
+.I semadj
|
|
|
5a015b |
+values across all processes sharing the list,
|
|
|
5a015b |
+and semaphore adjustments are performed only when the last process
|
|
|
5a015b |
+that is sharing the list terminates (or ceases sharing the list using
|
|
|
5a015b |
+.BR unshare (2)).
|
|
|
5a015b |
+If this flag is not set, then the child has a separate
|
|
|
5a015b |
+.I semadj
|
|
|
5a015b |
+list that is initially empty.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -703,7 +701,12 @@ must also include
|
|
|
5a015b |
.B CLONE_SIGHAND
|
|
|
5a015b |
if
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
-is specified.
|
|
|
5a015b |
+is specified
|
|
|
5a015b |
+(and note that, since Linux 2.6.0-test6,
|
|
|
5a015b |
+.BR CLONE_SIGHAND
|
|
|
5a015b |
+also requires
|
|
|
5a015b |
+.BR CLONE_VM
|
|
|
5a015b |
+to be included).
|
|
|
5a015b |
|
|
|
5a015b |
Signals may be sent to a thread group as a whole (i.e., a TGID) using
|
|
|
5a015b |
.BR kill (2),
|
|
|
5a015b |
@@ -761,7 +764,7 @@ or
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_VFORK
|
|
|
5a015b |
-is not set then both the calling process and the child are schedulable
|
|
|
5a015b |
+is not set, then both the calling process and the child are schedulable
|
|
|
5a015b |
after the call, and an application should not rely on execution occurring
|
|
|
5a015b |
in any particular order.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
@@ -786,7 +789,7 @@ space of the calling process at the time of
|
|
|
5a015b |
Memory writes or file mappings/unmappings performed by one of the
|
|
|
5a015b |
processes do not affect the other, as with
|
|
|
5a015b |
.BR fork (2).
|
|
|
5a015b |
-.SS The raw system call interface
|
|
|
5a015b |
+.SS C library/kernel differences
|
|
|
5a015b |
The raw
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
system call corresponds more closely to
|
|
|
5a015b |
@@ -801,16 +804,58 @@ arguments of the
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
wrapper function are omitted.
|
|
|
5a015b |
Furthermore, the argument order changes.
|
|
|
5a015b |
-The raw system call interface on x86 and many other architectures is roughly:
|
|
|
5a015b |
+In addition, there are variations across architectures.
|
|
|
5a015b |
+
|
|
|
5a015b |
+The raw system call interface on x86-64 and some other architectures
|
|
|
5a015b |
+(including sh, tile, and alpha) is roughly:
|
|
|
5a015b |
+
|
|
|
5a015b |
.in +4
|
|
|
5a015b |
.nf
|
|
|
5a015b |
+.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On x86-32, and several other common architectures
|
|
|
5a015b |
+(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
|
|
|
5a015b |
+and MIPS),
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS
|
|
|
5a015b |
+the order of the last two arguments is reversed:
|
|
|
5a015b |
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
-.BI " void *" ptid ", void *" ctid ,
|
|
|
5a015b |
-.BI " struct pt_regs *" regs );
|
|
|
5a015b |
+.BI " int *" ptid ", unsigned long " newtls ,
|
|
|
5a015b |
+.BI " int *" ctid );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On the cris and s390 architectures,
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS2
|
|
|
5a015b |
+the order of the first two arguments is reversed:
|
|
|
5a015b |
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
+.BI "long clone(void *" child_stack ", unsigned long " flags ,
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On the microblaze architecture,
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS3
|
|
|
5a015b |
+an additional argument is supplied:
|
|
|
5a015b |
+
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
+.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
+.BI " int " stack_size , "\fR /* Size of stack */"
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
.fi
|
|
|
5a015b |
.in
|
|
|
5a015b |
+
|
|
|
5a015b |
Another difference for the raw system call is that the
|
|
|
5a015b |
.I child_stack
|
|
|
5a015b |
argument may be zero, in which case copy-on-write semantics ensure that the
|
|
|
5a015b |
@@ -819,17 +864,13 @@ the stack.
|
|
|
5a015b |
In this case, for correct operation, the
|
|
|
5a015b |
.B CLONE_VM
|
|
|
5a015b |
option should not be specified.
|
|
|
5a015b |
-
|
|
|
5a015b |
-For some architectures, the order of the arguments for the system call
|
|
|
5a015b |
-differs from that shown above.
|
|
|
5a015b |
-On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
|
|
|
5a015b |
-and MIPS architectures,
|
|
|
5a015b |
-the order of the fourth and fifth arguments is reversed.
|
|
|
5a015b |
-On the cris and s390 architectures,
|
|
|
5a015b |
-the order of the first and second arguments is reversed.
|
|
|
5a015b |
+.\"
|
|
|
5a015b |
.SS blackfin, m68k, and sparc
|
|
|
5a015b |
+.\" Mike Frysinger noted in a 2013 mail:
|
|
|
5a015b |
+.\" these arches don't define __ARCH_WANT_SYS_CLONE:
|
|
|
5a015b |
+.\" blackfin ia64 m68k sparc
|
|
|
5a015b |
The argument-passing conventions on
|
|
|
5a015b |
-blackfin, m68k, and sparc are different from descriptions above.
|
|
|
5a015b |
+blackfin, m68k, and sparc are different from the descriptions above.
|
|
|
5a015b |
For details, see the kernel (and glibc) source.
|
|
|
5a015b |
.SS ia64
|
|
|
5a015b |
On ia64, a different interface is used:
|
|
|
5a015b |
@@ -883,7 +924,8 @@ will be set appropriately.
|
|
|
5a015b |
.SH ERRORS
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EAGAIN
|
|
|
5a015b |
-Too many processes are already running.
|
|
|
5a015b |
+Too many processes are already running; see
|
|
|
5a015b |
+.BR fork (2).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
.B CLONE_SIGHAND
|
|
|
5a015b |
@@ -908,6 +950,7 @@ was not.
|
|
|
5a015b |
.\" (Since Linux 2.6.0-test6.)
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
|
|
|
5a015b |
Both
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
and
|
|
|
5a015b |
@@ -915,6 +958,14 @@ and
|
|
|
5a015b |
were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.BR EINVAL " (since Linux 3.9)"
|
|
|
5a015b |
+Both
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.B CLONE_FS
|
|
|
5a015b |
+were specified in
|
|
|
5a015b |
+.IR flags .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
Both
|
|
|
5a015b |
.B CLONE_NEWIPC
|
|
|
5a015b |
@@ -924,18 +975,25 @@ were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
-Both
|
|
|
5a015b |
+One (or both) of
|
|
|
5a015b |
.BR CLONE_NEWPID
|
|
|
5a015b |
-and
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+and one (or both) of
|
|
|
5a015b |
.BR CLONE_THREAD
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_PARENT
|
|
|
5a015b |
were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
-Returned by
|
|
|
5a015b |
+Returned by the glibc
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
-when a zero value is specified for
|
|
|
5a015b |
-.IR child_stack .
|
|
|
5a015b |
+wrapper function when
|
|
|
5a015b |
+.IR fn
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.IR child_stack
|
|
|
5a015b |
+is specified as NULL.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
.BR CLONE_NEWIPC
|
|
|
5a015b |
@@ -971,11 +1029,48 @@ but the kernel was not configured with the
|
|
|
5a015b |
.B CONFIG_UTS
|
|
|
5a015b |
option.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.B EINVAL
|
|
|
5a015b |
+.I child_stack
|
|
|
5a015b |
+is not aligned to a suitable boundary for this architecture.
|
|
|
5a015b |
+For example, on aarch64,
|
|
|
5a015b |
+.I child_stack
|
|
|
5a015b |
+must be a multiple of 16.
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B ENOMEM
|
|
|
5a015b |
Cannot allocate sufficient memory to allocate a task structure for the
|
|
|
5a015b |
child, or to copy those parts of the caller's context that need to be
|
|
|
5a015b |
copied.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 3.7)"
|
|
|
5a015b |
+.\" commit f2302505775fd13ba93f034206f1e2a587017929
|
|
|
5a015b |
+.B CLONE_NEWPID
|
|
|
5a015b |
+was specified in flags,
|
|
|
5a015b |
+but the limit on the nesting depth of PID namespaces
|
|
|
5a015b |
+would have been exceeded; see
|
|
|
5a015b |
+.BR pid_namespaces (7).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+and the call would cause the limit on the number of
|
|
|
5a015b |
+nested user namespaces to be exceeded.
|
|
|
5a015b |
+See
|
|
|
5a015b |
+.BR user_namespaces (7).
|
|
|
5a015b |
+
|
|
|
5a015b |
+From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
|
|
|
5a015b |
+.BR EUSERS .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 4.9)"
|
|
|
5a015b |
+One of the values in
|
|
|
5a015b |
+.I flags
|
|
|
5a015b |
+specified the creation of a new user namespace,
|
|
|
5a015b |
+but doing so would have caused the limit defined by the corresponding file in
|
|
|
5a015b |
+.IR /proc/sys/user
|
|
|
5a015b |
+to be exceeded.
|
|
|
5a015b |
+For further details, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B EPERM
|
|
|
5a015b |
.BR CLONE_NEWIPC ,
|
|
|
5a015b |
.BR CLONE_NEWNET ,
|
|
|
5a015b |
@@ -989,22 +1084,62 @@ was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
|
|
|
5a015b |
.B CLONE_PID
|
|
|
5a015b |
was specified by a process other than process 0.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.B EPERM
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+but either the effective user ID or the effective group ID of the caller
|
|
|
5a015b |
+does not have a mapping in the parent namespace (see
|
|
|
5a015b |
+.BR user_namespaces (7)).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR EPERM " (since Linux 3.9)"
|
|
|
5a015b |
+.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.I flags
|
|
|
5a015b |
+and the caller is in a chroot environment
|
|
|
5a015b |
+.\" FIXME What is the rationale for this restriction?
|
|
|
5a015b |
+(i.e., the caller's root directory does not match the root directory
|
|
|
5a015b |
+of the mount namespace in which it resides).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.BR ERESTARTNOINTR " (since Linux 2.6.17)"
|
|
|
5a015b |
+.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
|
|
|
5a015b |
System call was interrupted by a signal and will be restarted.
|
|
|
5a015b |
(This can be seen only during a trace.)
|
|
|
5a015b |
-.SH VERSIONS
|
|
|
5a015b |
-There is no entry for
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-in libc5.
|
|
|
5a015b |
-glibc2 provides
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-as described in this manual page.
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR EUSERS " (Linux 3.11 to Linux 4.8)"
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+and the limit on the number of nested user namespaces would be exceeded.
|
|
|
5a015b |
+See the discussion of the
|
|
|
5a015b |
+.BR ENOSPC
|
|
|
5a015b |
+error above.
|
|
|
5a015b |
+.\" .SH VERSIONS
|
|
|
5a015b |
+.\" There is no entry for
|
|
|
5a015b |
+.\" .BR clone ()
|
|
|
5a015b |
+.\" in libc5.
|
|
|
5a015b |
+.\" glibc2 provides
|
|
|
5a015b |
+.\" .BR clone ()
|
|
|
5a015b |
+.\" as described in this manual page.
|
|
|
5a015b |
.SH CONFORMING TO
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
is Linux-specific and should not be used in programs
|
|
|
5a015b |
intended to be portable.
|
|
|
5a015b |
.SH NOTES
|
|
|
5a015b |
-In the kernel 2.4.x series,
|
|
|
5a015b |
+The
|
|
|
5a015b |
+.BR kcmp (2)
|
|
|
5a015b |
+system call can be used to test whether two processes share various
|
|
|
5a015b |
+resources such as a file descriptor table,
|
|
|
5a015b |
+System V semaphore undo operations, or a virtual address space.
|
|
|
5a015b |
+
|
|
|
5a015b |
+
|
|
|
5a015b |
+Handlers registered using
|
|
|
5a015b |
+.BR pthread_atfork (3)
|
|
|
5a015b |
+are not executed during a call to
|
|
|
5a015b |
+.BR clone ().
|
|
|
5a015b |
+
|
|
|
5a015b |
+In the Linux 2.4.x series,
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
generally does not make the parent of the new thread the same
|
|
|
5a015b |
as the parent of the calling process.
|
|
|
5a015b |
@@ -1012,14 +1147,13 @@ However, for kernel versions 2.4.7 to 2.4.18 the
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
flag implied the
|
|
|
5a015b |
.B CLONE_PARENT
|
|
|
5a015b |
-flag (as in kernel 2.6).
|
|
|
5a015b |
+flag (as in Linux 2.6.0 and later).
|
|
|
5a015b |
|
|
|
5a015b |
For a while there was
|
|
|
5a015b |
.B CLONE_DETACHED
|
|
|
5a015b |
(introduced in 2.5.32):
|
|
|
5a015b |
parent wants no child-exit signal.
|
|
|
5a015b |
-In 2.6.2 the need to give this
|
|
|
5a015b |
-together with
|
|
|
5a015b |
+In Linux 2.6.2, the need to give this flag together with
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
disappeared.
|
|
|
5a015b |
This flag is still defined, but has no effect.
|
|
|
5a015b |
@@ -1088,7 +1222,6 @@ To get the truth, it may be necessary to use code such as the following:
|
|
|
5a015b |
.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
|
|
|
5a015b |
.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
|
|
|
5a015b |
.SH EXAMPLE
|
|
|
5a015b |
-.SS Create a child that executes in a separate UTS namespace
|
|
|
5a015b |
The following program demonstrates the use of
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
to create a child process that executes in a separate UTS namespace.
|
|
|
5a015b |
@@ -1098,7 +1231,7 @@ making it possible to see that the hostname
|
|
|
5a015b |
differs in the UTS namespaces of the parent and child.
|
|
|
5a015b |
For an example of the use of this program, see
|
|
|
5a015b |
.BR setns (2).
|
|
|
5a015b |
-
|
|
|
5a015b |
+.SS Program source
|
|
|
5a015b |
.nf
|
|
|
5a015b |
#define _GNU_SOURCE
|
|
|
5a015b |
#include <sys/wait.h>
|
|
|
5a015b |
@@ -1198,6 +1331,7 @@ main(int argc, char *argv[])
|
|
|
5a015b |
.BR unshare (2),
|
|
|
5a015b |
.BR wait (2),
|
|
|
5a015b |
.BR capabilities (7),
|
|
|
5a015b |
+.BR namespaces (7),
|
|
|
5a015b |
.BR pthreads (7)
|
|
|
5a015b |
.SH COLOPHON
|
|
|
5a015b |
This page is part of release 3.53 of the Linux
|
|
|
5a015b |
diff --git a/man-pages/man2/clone.2 b/man-pages/man2/clone.2
|
|
|
5a015b |
index d9ffe3e..d053b0e 100644
|
|
|
5a015b |
--- a/man-pages/man2/clone.2
|
|
|
5a015b |
+++ b/man-pages/man2/clone.2
|
|
|
5a015b |
@@ -39,50 +39,23 @@
|
|
|
5a015b |
.\" 2008-11-19, mtk, document CLONE_NEWIPC
|
|
|
5a015b |
.\" 2008-11-19, Jens Axboe, mtk, document CLONE_IO
|
|
|
5a015b |
.\"
|
|
|
5a015b |
-.\" FIXME Document CLONE_NEWUSER, which is new in 2.6.23
|
|
|
5a015b |
-.\" (also supported for unshare()?)
|
|
|
5a015b |
-.\"
|
|
|
5a015b |
-.TH CLONE 2 2013-04-16 "Linux" "Linux Programmer's Manual"
|
|
|
5a015b |
+.TH CLONE 2 2016-12-12 "Linux" "Linux Programmer's Manual"
|
|
|
5a015b |
.SH NAME
|
|
|
5a015b |
clone, __clone2 \- create a child process
|
|
|
5a015b |
.SH SYNOPSIS
|
|
|
5a015b |
.nf
|
|
|
5a015b |
/* Prototype for the glibc wrapper function */
|
|
|
5a015b |
|
|
|
5a015b |
+.B #define _GNU_SOURCE
|
|
|
5a015b |
.B #include <sched.h>
|
|
|
5a015b |
|
|
|
5a015b |
.BI "int clone(int (*" "fn" ")(void *), void *" child_stack ,
|
|
|
5a015b |
.BI " int " flags ", void *" "arg" ", ... "
|
|
|
5a015b |
-.BI " /* pid_t *" ptid ", struct user_desc *" tls \
|
|
|
5a015b |
+.BI " /* pid_t *" ptid ", void *" newtls \
|
|
|
5a015b |
", pid_t *" ctid " */ );"
|
|
|
5a015b |
|
|
|
5a015b |
-/* Prototype for the raw system call */
|
|
|
5a015b |
-
|
|
|
5a015b |
-.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
-.BI " void *" ptid ", void *" ctid ,
|
|
|
5a015b |
-.BI " struct pt_regs *" regs );
|
|
|
5a015b |
+/* For the prototype of the raw system call, see NOTES */
|
|
|
5a015b |
.fi
|
|
|
5a015b |
-.sp
|
|
|
5a015b |
-.in -4n
|
|
|
5a015b |
-Feature Test Macro Requirements for glibc wrapper function (see
|
|
|
5a015b |
-.BR feature_test_macros (7)):
|
|
|
5a015b |
-.in
|
|
|
5a015b |
-.sp
|
|
|
5a015b |
-.BR clone ():
|
|
|
5a015b |
-.ad l
|
|
|
5a015b |
-.RS 4
|
|
|
5a015b |
-.PD 0
|
|
|
5a015b |
-.TP 4
|
|
|
5a015b |
-Since glibc 2.14:
|
|
|
5a015b |
-_GNU_SOURCE
|
|
|
5a015b |
-.TP 4
|
|
|
5a015b |
-.\" FIXME See http://sources.redhat.com/bugzilla/show_bug.cgi?id=4749
|
|
|
5a015b |
-Before glibc 2.14:
|
|
|
5a015b |
-_BSD_SOURCE || _SVID_SOURCE
|
|
|
5a015b |
- /* _GNU_SOURCE also suffices */
|
|
|
5a015b |
-.PD
|
|
|
5a015b |
-.RE
|
|
|
5a015b |
-.ad b
|
|
|
5a015b |
.SH DESCRIPTION
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
creates a new process, in a manner similar to
|
|
|
5a015b |
@@ -107,7 +80,7 @@ But see the description of
|
|
|
5a015b |
.B CLONE_PARENT
|
|
|
5a015b |
below.)
|
|
|
5a015b |
|
|
|
5a015b |
-The main use of
|
|
|
5a015b |
+One use of
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
is to implement threads: multiple threads of control in a program that
|
|
|
5a015b |
run concurrently in a shared memory space.
|
|
|
5a015b |
@@ -180,7 +153,7 @@ in order to specify what is shared between the calling process
|
|
|
5a015b |
and the child process:
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_CHILD_CLEARTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Erase child thread ID at location
|
|
|
5a015b |
+Clear (zero) the child thread ID at the location
|
|
|
5a015b |
.I ctid
|
|
|
5a015b |
in child memory when the child exits, and do a wakeup on the futex
|
|
|
5a015b |
at that address.
|
|
|
5a015b |
@@ -190,9 +163,12 @@ system call.
|
|
|
5a015b |
This is used by threading libraries.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_CHILD_SETTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Store child thread ID at location
|
|
|
5a015b |
+Store the child thread ID at the location
|
|
|
5a015b |
.I ctid
|
|
|
5a015b |
-in child memory.
|
|
|
5a015b |
+in the child's memory.
|
|
|
5a015b |
+The store operation completes before
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+returns control to user space.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_FILES " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -206,27 +182,31 @@ or changes its associated flags (using the
|
|
|
5a015b |
.BR fcntl (2)
|
|
|
5a015b |
.B F_SETFD
|
|
|
5a015b |
operation), the other process is also affected.
|
|
|
5a015b |
+If a process sharing a file descriptor table calls
|
|
|
5a015b |
+.BR execve (2),
|
|
|
5a015b |
+its file descriptor table is duplicated (unshared).
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FILES
|
|
|
5a015b |
is not set, the child process inherits a copy of all file descriptors
|
|
|
5a015b |
opened in the calling process at the time of
|
|
|
5a015b |
.BR clone ().
|
|
|
5a015b |
-(The duplicated file descriptors in the child refer to the
|
|
|
5a015b |
-same open file descriptions (see
|
|
|
5a015b |
-.BR open (2))
|
|
|
5a015b |
-as the corresponding file descriptors in the calling process.)
|
|
|
5a015b |
Subsequent operations that open or close file descriptors,
|
|
|
5a015b |
or change file descriptor flags,
|
|
|
5a015b |
performed by either the calling
|
|
|
5a015b |
process or the child process do not affect the other process.
|
|
|
5a015b |
+Note, however,
|
|
|
5a015b |
+that the duplicated file descriptors in the child refer to the same open file
|
|
|
5a015b |
+descriptions as the corresponding file descriptors in the calling process,
|
|
|
5a015b |
+and thus share file offsets and file status flags (see
|
|
|
5a015b |
+.BR open (2)).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_FS " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
-is set, the caller and the child process share the same file system
|
|
|
5a015b |
+is set, the caller and the child process share the same filesystem
|
|
|
5a015b |
information.
|
|
|
5a015b |
-This includes the root of the file system, the current
|
|
|
5a015b |
+This includes the root of the filesystem, the current
|
|
|
5a015b |
working directory, and the umask.
|
|
|
5a015b |
Any call to
|
|
|
5a015b |
.BR chroot (2),
|
|
|
5a015b |
@@ -238,7 +218,7 @@ other process.
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
-is not set, the child process works on a copy of the file system
|
|
|
5a015b |
+is not set, the child process works on a copy of the filesystem
|
|
|
5a015b |
information of the calling process at the time of the
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
call.
|
|
|
5a015b |
@@ -258,7 +238,7 @@ If this flag is not set, then (as with
|
|
|
5a015b |
the new process has its own I/O context.
|
|
|
5a015b |
|
|
|
5a015b |
.\" The following based on text from Jens Axboe
|
|
|
5a015b |
-The I/O context is the I/O scope of the disk scheduler (i.e,
|
|
|
5a015b |
+The I/O context is the I/O scope of the disk scheduler (i.e.,
|
|
|
5a015b |
what the I/O scheduler uses to model scheduling of a process's I/O).
|
|
|
5a015b |
If processes share the same I/O context,
|
|
|
5a015b |
they are treated as one by the I/O scheduler.
|
|
|
5a015b |
@@ -288,7 +268,7 @@ the process is created in the same IPC namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
-An IPC namespace provides an isolated view of System V IPC objects (see
|
|
|
5a015b |
+An IPC namespace provides an isolated view of System\ V IPC objects (see
|
|
|
5a015b |
.BR svipc (7))
|
|
|
5a015b |
and (since Linux 2.6.30)
|
|
|
5a015b |
.\" commit 7eafd7c74c3f2e67c27621b987b28397110d643f
|
|
|
5a015b |
@@ -308,17 +288,17 @@ When an IPC namespace is destroyed
|
|
|
5a015b |
(i.e., when the last process that is a member of the namespace terminates),
|
|
|
5a015b |
all IPC objects in the namespace are automatically destroyed.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_SYSVIPC
|
|
|
5a015b |
-and
|
|
|
5a015b |
-.B CONFIG_IPC_NS
|
|
|
5a015b |
-options and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWIPC .
|
|
|
5a015b |
This flag can't be specified in conjunction with
|
|
|
5a015b |
.BR CLONE_SYSVSEM .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on IPC namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWNET " (since Linux 2.6.24)"
|
|
|
5a015b |
-.\" FIXME Check when the implementation was completed
|
|
|
5a015b |
(The implementation of this flag was completed only
|
|
|
5a015b |
by about kernel version 2.6.29.)
|
|
|
5a015b |
|
|
|
5a015b |
@@ -326,7 +306,7 @@ If
|
|
|
5a015b |
.B CLONE_NEWNET
|
|
|
5a015b |
is set, then create the process in a new network namespace.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same network namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
@@ -341,7 +321,7 @@ directory trees, sockets, etc.).
|
|
|
5a015b |
A physical network device can live in exactly one
|
|
|
5a015b |
network namespace.
|
|
|
5a015b |
A virtual network device ("veth") pair provides a pipe-like abstraction
|
|
|
5a015b |
-.\" FIXME Add pointer to veth(4) page when it is eventually completed
|
|
|
5a015b |
+.\" FIXME . Add pointer to veth(4) page when it is eventually completed
|
|
|
5a015b |
that can be used to create tunnels between network namespaces,
|
|
|
5a015b |
and can be used to create a bridge to a physical network device
|
|
|
5a015b |
in another namespace.
|
|
|
5a015b |
@@ -350,54 +330,41 @@ When a network namespace is freed
|
|
|
5a015b |
(i.e., when the last process in the namespace terminates),
|
|
|
5a015b |
its physical network devices are moved back to the
|
|
|
5a015b |
initial network namespace (not to the parent of the process).
|
|
|
5a015b |
+For further information on network namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_NET_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWNET .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWNS " (since Linux 2.4.19)"
|
|
|
5a015b |
-Start the child in a new mount namespace.
|
|
|
5a015b |
-
|
|
|
5a015b |
-Every process lives in a mount namespace.
|
|
|
5a015b |
-The
|
|
|
5a015b |
-.I namespace
|
|
|
5a015b |
-of a process is the data (the set of mounts) describing the file hierarchy
|
|
|
5a015b |
-as seen by that process.
|
|
|
5a015b |
-After a
|
|
|
5a015b |
-.BR fork (2)
|
|
|
5a015b |
-or
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-where the
|
|
|
5a015b |
-.B CLONE_NEWNS
|
|
|
5a015b |
-flag is not set, the child lives in the same mount
|
|
|
5a015b |
-namespace as the parent.
|
|
|
5a015b |
-The system calls
|
|
|
5a015b |
-.BR mount (2)
|
|
|
5a015b |
-and
|
|
|
5a015b |
-.BR umount (2)
|
|
|
5a015b |
-change the mount namespace of the calling process, and hence affect
|
|
|
5a015b |
-all processes that live in the same namespace, but do not affect
|
|
|
5a015b |
-processes in a different mount namespace.
|
|
|
5a015b |
-
|
|
|
5a015b |
-After a
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-where the
|
|
|
5a015b |
+If
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
-flag is set, the cloned child is started in a new mount namespace,
|
|
|
5a015b |
+is set, the cloned child is started in a new mount namespace,
|
|
|
5a015b |
initialized with a copy of the namespace of the parent.
|
|
|
5a015b |
-
|
|
|
5a015b |
-Only a privileged process (one having the \fBCAP_SYS_ADMIN\fP capability)
|
|
|
5a015b |
-may specify the
|
|
|
5a015b |
+If
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
-flag.
|
|
|
5a015b |
+is not set, the child lives in the same mount
|
|
|
5a015b |
+namespace as the parent.
|
|
|
5a015b |
+
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWNS .
|
|
|
5a015b |
It is not permitted to specify both
|
|
|
5a015b |
.B CLONE_NEWNS
|
|
|
5a015b |
and
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
+.\" See https://lwn.net/Articles/543273/
|
|
|
5a015b |
in the same
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
call.
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on mount namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR mount_namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWPID " (since Linux 2.6.24)"
|
|
|
5a015b |
.\" This explanation draws a lot of details from
|
|
|
5a015b |
@@ -411,73 +378,74 @@ If
|
|
|
5a015b |
.B CLONE_NEWPID
|
|
|
5a015b |
is set, then create the process in a new PID namespace.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same PID namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
-A PID namespace provides an isolated environment for PIDs:
|
|
|
5a015b |
-PIDs in a new namespace start at 1,
|
|
|
5a015b |
-somewhat like a standalone system, and calls to
|
|
|
5a015b |
-.BR fork (2),
|
|
|
5a015b |
-.BR vfork (2),
|
|
|
5a015b |
+For further information on PID namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR pid_namespaces (7).
|
|
|
5a015b |
+
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWPID .
|
|
|
5a015b |
+This flag can't be specified in conjunction with
|
|
|
5a015b |
+.BR CLONE_THREAD
|
|
|
5a015b |
or
|
|
|
5a015b |
+.BR CLONE_PARENT .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+(This flag first became meaningful for
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
-will produce processes with PIDs that are unique within the namespace.
|
|
|
5a015b |
+in Linux 2.6.23,
|
|
|
5a015b |
+the current
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+semantics were merged in Linux 3.5,
|
|
|
5a015b |
+and the final pieces to make the user namespaces completely usable were
|
|
|
5a015b |
+merged in Linux 3.8.)
|
|
|
5a015b |
|
|
|
5a015b |
-The first process created in a new namespace
|
|
|
5a015b |
-(i.e., the process created using the
|
|
|
5a015b |
-.BR CLONE_NEWPID
|
|
|
5a015b |
-flag) has the PID 1, and is the "init" process for the namespace.
|
|
|
5a015b |
-Children that are orphaned within the namespace will be reparented
|
|
|
5a015b |
-to this process rather than
|
|
|
5a015b |
-.BR init (8).
|
|
|
5a015b |
-Unlike the traditional
|
|
|
5a015b |
-.B init
|
|
|
5a015b |
-process, the "init" process of a PID namespace can terminate,
|
|
|
5a015b |
-and if it does, all of the processes in the namespace are terminated.
|
|
|
5a015b |
-
|
|
|
5a015b |
-PID namespaces form a hierarchy.
|
|
|
5a015b |
-When a new PID namespace is created,
|
|
|
5a015b |
-the processes in that namespace are visible
|
|
|
5a015b |
-in the PID namespace of the process that created the new namespace;
|
|
|
5a015b |
-analogously, if the parent PID namespace is itself
|
|
|
5a015b |
-the child of another PID namespace,
|
|
|
5a015b |
-then processes in the child and parent PID namespaces will both be
|
|
|
5a015b |
-visible in the grandparent PID namespace.
|
|
|
5a015b |
-Conversely, the processes in the "child" PID namespace do not see
|
|
|
5a015b |
-the processes in the parent namespace.
|
|
|
5a015b |
-The existence of a namespace hierarchy means that each process
|
|
|
5a015b |
-may now have multiple PIDs:
|
|
|
5a015b |
-one for each namespace in which it is visible;
|
|
|
5a015b |
-each of these PIDs is unique within the corresponding namespace.
|
|
|
5a015b |
-(A call to
|
|
|
5a015b |
-.BR getpid (2)
|
|
|
5a015b |
-always returns the PID associated with the namespace in which
|
|
|
5a015b |
-the process lives.)
|
|
|
5a015b |
-
|
|
|
5a015b |
-After creating the new namespace,
|
|
|
5a015b |
-it is useful for the child to change its root directory
|
|
|
5a015b |
-and mount a new procfs instance at
|
|
|
5a015b |
-.I /proc
|
|
|
5a015b |
-so that tools such as
|
|
|
5a015b |
-.BR ps (1)
|
|
|
5a015b |
-work correctly.
|
|
|
5a015b |
-.\" mount -t proc proc /proc
|
|
|
5a015b |
-(If
|
|
|
5a015b |
-.BR CLONE_NEWNS
|
|
|
5a015b |
-is also included in
|
|
|
5a015b |
-.IR flags ,
|
|
|
5a015b |
-then it isn't necessary to change the root directory:
|
|
|
5a015b |
-a new procfs instance can be mounted directly over
|
|
|
5a015b |
-.IR /proc .)
|
|
|
5a015b |
+If
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+is set, then create the process in a new user namespace.
|
|
|
5a015b |
+If this flag is not set, then (as with
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
+the process is created in the same user namespace as the calling process.
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on user namespaces, see
|
|
|
5a015b |
+.BR namespaces (7)
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR user_namespaces (7)
|
|
|
5a015b |
+
|
|
|
5a015b |
+Before Linux 3.8, use of
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+required that the caller have three capabilities:
|
|
|
5a015b |
+.BR CAP_SYS_ADMIN ,
|
|
|
5a015b |
+.BR CAP_SETUID ,
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.BR CAP_SETGID .
|
|
|
5a015b |
+.\" Before Linux 2.6.29, it appears that only CAP_SYS_ADMIN was needed
|
|
|
5a015b |
+Starting with Linux 3.8,
|
|
|
5a015b |
+no privileges are needed to create a user namespace.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_PID_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
This flag can't be specified in conjunction with
|
|
|
5a015b |
-.BR CLONE_THREAD .
|
|
|
5a015b |
+.BR CLONE_THREAD
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_PARENT .
|
|
|
5a015b |
+For security reasons,
|
|
|
5a015b |
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
|
|
|
5a015b |
+.\" https://lwn.net/Articles/543273/
|
|
|
5a015b |
+.\" The fix actually went into 3.9 and into 3.8.3. However, user namespaces
|
|
|
5a015b |
+.\" were, for practical purposes, unusable in earlier 3.8.x because of the
|
|
|
5a015b |
+.\" various filesystems that didn't support userns.
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+cannot be specified in conjunction with
|
|
|
5a015b |
+.BR CLONE_FS .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on user namespaces, see
|
|
|
5a015b |
+.BR user_namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_NEWUTS " (since Linux 2.6.19)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -486,27 +454,29 @@ is set, then create the process in a new UTS namespace,
|
|
|
5a015b |
whose identifiers are initialized by duplicating the identifiers
|
|
|
5a015b |
from the UTS namespace of the calling process.
|
|
|
5a015b |
If this flag is not set, then (as with
|
|
|
5a015b |
-.BR fork (2)),
|
|
|
5a015b |
+.BR fork (2))
|
|
|
5a015b |
the process is created in the same UTS namespace as
|
|
|
5a015b |
the calling process.
|
|
|
5a015b |
This flag is intended for the implementation of containers.
|
|
|
5a015b |
|
|
|
5a015b |
A UTS namespace is the set of identifiers returned by
|
|
|
5a015b |
.BR uname (2);
|
|
|
5a015b |
-among these, the domain name and the host name can be modified by
|
|
|
5a015b |
+among these, the domain name and the hostname can be modified by
|
|
|
5a015b |
.BR setdomainname (2)
|
|
|
5a015b |
and
|
|
|
5a015b |
-.BR
|
|
|
5a015b |
.BR sethostname (2),
|
|
|
5a015b |
respectively.
|
|
|
5a015b |
Changes made to the identifiers in a UTS namespace
|
|
|
5a015b |
are visible to all other processes in the same namespace,
|
|
|
5a015b |
but are not visible to processes in other UTS namespaces.
|
|
|
5a015b |
|
|
|
5a015b |
-Use of this flag requires: a kernel configured with the
|
|
|
5a015b |
-.B CONFIG_UTS_NS
|
|
|
5a015b |
-option and that the process be privileged
|
|
|
5a015b |
-.RB ( CAP_SYS_ADMIN ).
|
|
|
5a015b |
+Only a privileged process
|
|
|
5a015b |
+.RB ( CAP_SYS_ADMIN )
|
|
|
5a015b |
+can employ
|
|
|
5a015b |
+.BR CLONE_NEWUTS .
|
|
|
5a015b |
+
|
|
|
5a015b |
+For further information on UTS namespaces, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PARENT " (since Linux 2.3.12)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -530,12 +500,15 @@ is set, then the parent of the calling process, rather than the
|
|
|
5a015b |
calling process itself, will be signaled.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PARENT_SETTID " (since Linux 2.5.49)"
|
|
|
5a015b |
-Store child thread ID at location
|
|
|
5a015b |
+Store the child thread ID at the location
|
|
|
5a015b |
.I ptid
|
|
|
5a015b |
-in parent and child memory.
|
|
|
5a015b |
+in the parent's memory.
|
|
|
5a015b |
(In Linux 2.5.32-2.5.48 there was a flag
|
|
|
5a015b |
.B CLONE_SETTID
|
|
|
5a015b |
that did this.)
|
|
|
5a015b |
+The store operation completes before
|
|
|
5a015b |
+.BR clone ()
|
|
|
5a015b |
+returns control to user space.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PID " (obsolete)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -547,6 +520,7 @@ of not much use.
|
|
|
5a015b |
Since 2.3.21 this flag can be
|
|
|
5a015b |
specified only by the system boot process (PID 0).
|
|
|
5a015b |
It disappeared in Linux 2.5.16.
|
|
|
5a015b |
+Since then, the kernel silently ignores it without error.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_PTRACE " (since Linux 2.2)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -556,11 +530,25 @@ then trace the child also (see
|
|
|
5a015b |
.BR ptrace (2)).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SETTLS " (since Linux 2.5.32)"
|
|
|
5a015b |
-The
|
|
|
5a015b |
+The TLS (Thread Local Storage) descriptor is set to
|
|
|
5a015b |
+.I newtls.
|
|
|
5a015b |
+
|
|
|
5a015b |
+The interpretation of
|
|
|
5a015b |
.I newtls
|
|
|
5a015b |
-argument is the new TLS (Thread Local Storage) descriptor.
|
|
|
5a015b |
+and the resulting effect is architecture dependent.
|
|
|
5a015b |
+On x86,
|
|
|
5a015b |
+.I newtls
|
|
|
5a015b |
+is interpreted as a
|
|
|
5a015b |
+.IR "struct user_desc *"
|
|
|
5a015b |
(See
|
|
|
5a015b |
-.BR set_thread_area (2).)
|
|
|
5a015b |
+.BR set_thread_area (2)).
|
|
|
5a015b |
+On x86_64 it is the new value to be set for the %fs base register
|
|
|
5a015b |
+(See the
|
|
|
5a015b |
+.I ARCH_SET_FS
|
|
|
5a015b |
+argument to
|
|
|
5a015b |
+.BR arch_prctl (2)).
|
|
|
5a015b |
+On architectures with a dedicated TLS register, it is the new value
|
|
|
5a015b |
+of that register.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SIGHAND " (since Linux 2.0)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -612,16 +600,26 @@ from Linux 2.6.25 onward,
|
|
|
5a015b |
and was
|
|
|
5a015b |
.I removed
|
|
|
5a015b |
altogether in Linux 2.6.38.
|
|
|
5a015b |
+Since then, the kernel silently ignores it without error.
|
|
|
5a015b |
.\" glibc 2.8 removed this defn from bits/sched.h
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_SYSVSEM " (since Linux 2.5.10)"
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_SYSVSEM
|
|
|
5a015b |
is set, then the child and the calling process share
|
|
|
5a015b |
-a single list of System V semaphore undo values (see
|
|
|
5a015b |
+a single list of System V semaphore adjustment
|
|
|
5a015b |
+.RI ( semadj )
|
|
|
5a015b |
+values (see
|
|
|
5a015b |
.BR semop (2)).
|
|
|
5a015b |
-If this flag is not set, then the child has a separate undo list,
|
|
|
5a015b |
-which is initially empty.
|
|
|
5a015b |
+In this case, the shared list accumulates
|
|
|
5a015b |
+.I semadj
|
|
|
5a015b |
+values across all processes sharing the list,
|
|
|
5a015b |
+and semaphore adjustments are performed only when the last process
|
|
|
5a015b |
+that is sharing the list terminates (or ceases sharing the list using
|
|
|
5a015b |
+.BR unshare (2)).
|
|
|
5a015b |
+If this flag is not set, then the child has a separate
|
|
|
5a015b |
+.I semadj
|
|
|
5a015b |
+list that is initially empty.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.BR CLONE_THREAD " (since Linux 2.4.0-test8)"
|
|
|
5a015b |
If
|
|
|
5a015b |
@@ -703,7 +701,12 @@ must also include
|
|
|
5a015b |
.B CLONE_SIGHAND
|
|
|
5a015b |
if
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
-is specified.
|
|
|
5a015b |
+is specified
|
|
|
5a015b |
+(and note that, since Linux 2.6.0-test6,
|
|
|
5a015b |
+.BR CLONE_SIGHAND
|
|
|
5a015b |
+also requires
|
|
|
5a015b |
+.BR CLONE_VM
|
|
|
5a015b |
+to be included).
|
|
|
5a015b |
|
|
|
5a015b |
Signals may be sent to a thread group as a whole (i.e., a TGID) using
|
|
|
5a015b |
.BR kill (2),
|
|
|
5a015b |
@@ -761,7 +764,7 @@ or
|
|
|
5a015b |
|
|
|
5a015b |
If
|
|
|
5a015b |
.B CLONE_VFORK
|
|
|
5a015b |
-is not set then both the calling process and the child are schedulable
|
|
|
5a015b |
+is not set, then both the calling process and the child are schedulable
|
|
|
5a015b |
after the call, and an application should not rely on execution occurring
|
|
|
5a015b |
in any particular order.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
@@ -786,7 +789,7 @@ space of the calling process at the time of
|
|
|
5a015b |
Memory writes or file mappings/unmappings performed by one of the
|
|
|
5a015b |
processes do not affect the other, as with
|
|
|
5a015b |
.BR fork (2).
|
|
|
5a015b |
-.SS The raw system call interface
|
|
|
5a015b |
+.SS C library/kernel differences
|
|
|
5a015b |
The raw
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
system call corresponds more closely to
|
|
|
5a015b |
@@ -801,16 +804,58 @@ arguments of the
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
wrapper function are omitted.
|
|
|
5a015b |
Furthermore, the argument order changes.
|
|
|
5a015b |
-The raw system call interface on x86 and many other architectures is roughly:
|
|
|
5a015b |
+In addition, there are variations across architectures.
|
|
|
5a015b |
+
|
|
|
5a015b |
+The raw system call interface on x86-64 and some other architectures
|
|
|
5a015b |
+(including sh, tile, and alpha) is roughly:
|
|
|
5a015b |
+
|
|
|
5a015b |
.in +4
|
|
|
5a015b |
.nf
|
|
|
5a015b |
+.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On x86-32, and several other common architectures
|
|
|
5a015b |
+(including score, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
|
|
|
5a015b |
+and MIPS),
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS
|
|
|
5a015b |
+the order of the last two arguments is reversed:
|
|
|
5a015b |
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
-.BI " void *" ptid ", void *" ctid ,
|
|
|
5a015b |
-.BI " struct pt_regs *" regs );
|
|
|
5a015b |
+.BI " int *" ptid ", unsigned long " newtls ,
|
|
|
5a015b |
+.BI " int *" ctid );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On the cris and s390 architectures,
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS2
|
|
|
5a015b |
+the order of the first two arguments is reversed:
|
|
|
5a015b |
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
+.BI "long clone(void *" child_stack ", unsigned long " flags ,
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
+.fi
|
|
|
5a015b |
+.in
|
|
|
5a015b |
+
|
|
|
5a015b |
+On the microblaze architecture,
|
|
|
5a015b |
+.\" CONFIG_CLONE_BACKWARDS3
|
|
|
5a015b |
+an additional argument is supplied:
|
|
|
5a015b |
+
|
|
|
5a015b |
+.in +4
|
|
|
5a015b |
+.nf
|
|
|
5a015b |
+.BI "long clone(unsigned long " flags ", void *" child_stack ,
|
|
|
5a015b |
+.BI " int " stack_size , "\fR /* Size of stack */"
|
|
|
5a015b |
+.BI " int *" ptid ", int *" ctid ,
|
|
|
5a015b |
+.BI " unsigned long " newtls );
|
|
|
5a015b |
.fi
|
|
|
5a015b |
.in
|
|
|
5a015b |
+
|
|
|
5a015b |
Another difference for the raw system call is that the
|
|
|
5a015b |
.I child_stack
|
|
|
5a015b |
argument may be zero, in which case copy-on-write semantics ensure that the
|
|
|
5a015b |
@@ -819,17 +864,13 @@ the stack.
|
|
|
5a015b |
In this case, for correct operation, the
|
|
|
5a015b |
.B CLONE_VM
|
|
|
5a015b |
option should not be specified.
|
|
|
5a015b |
-
|
|
|
5a015b |
-For some architectures, the order of the arguments for the system call
|
|
|
5a015b |
-differs from that shown above.
|
|
|
5a015b |
-On the score, microblaze, ARM, ARM 64, PA-RISC, arc, Power PC, xtensa,
|
|
|
5a015b |
-and MIPS architectures,
|
|
|
5a015b |
-the order of the fourth and fifth arguments is reversed.
|
|
|
5a015b |
-On the cris and s390 architectures,
|
|
|
5a015b |
-the order of the first and second arguments is reversed.
|
|
|
5a015b |
+.\"
|
|
|
5a015b |
.SS blackfin, m68k, and sparc
|
|
|
5a015b |
+.\" Mike Frysinger noted in a 2013 mail:
|
|
|
5a015b |
+.\" these arches don't define __ARCH_WANT_SYS_CLONE:
|
|
|
5a015b |
+.\" blackfin ia64 m68k sparc
|
|
|
5a015b |
The argument-passing conventions on
|
|
|
5a015b |
-blackfin, m68k, and sparc are different from descriptions above.
|
|
|
5a015b |
+blackfin, m68k, and sparc are different from the descriptions above.
|
|
|
5a015b |
For details, see the kernel (and glibc) source.
|
|
|
5a015b |
.SS ia64
|
|
|
5a015b |
On ia64, a different interface is used:
|
|
|
5a015b |
@@ -883,7 +924,8 @@ will be set appropriately.
|
|
|
5a015b |
.SH ERRORS
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EAGAIN
|
|
|
5a015b |
-Too many processes are already running.
|
|
|
5a015b |
+Too many processes are already running; see
|
|
|
5a015b |
+.BR fork (2).
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
.B CLONE_SIGHAND
|
|
|
5a015b |
@@ -908,6 +950,7 @@ was not.
|
|
|
5a015b |
.\" (Since Linux 2.6.0-test6.)
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
+.\" commit e66eded8309ebf679d3d3c1f5820d1f2ca332c71
|
|
|
5a015b |
Both
|
|
|
5a015b |
.B CLONE_FS
|
|
|
5a015b |
and
|
|
|
5a015b |
@@ -915,6 +958,14 @@ and
|
|
|
5a015b |
were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.BR EINVAL " (since Linux 3.9)"
|
|
|
5a015b |
+Both
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+and
|
|
|
5a015b |
+.B CLONE_FS
|
|
|
5a015b |
+were specified in
|
|
|
5a015b |
+.IR flags .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
Both
|
|
|
5a015b |
.B CLONE_NEWIPC
|
|
|
5a015b |
@@ -924,18 +975,25 @@ were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
-Both
|
|
|
5a015b |
+One (or both) of
|
|
|
5a015b |
.BR CLONE_NEWPID
|
|
|
5a015b |
-and
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+and one (or both) of
|
|
|
5a015b |
.BR CLONE_THREAD
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.BR CLONE_PARENT
|
|
|
5a015b |
were specified in
|
|
|
5a015b |
.IR flags .
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
-Returned by
|
|
|
5a015b |
+Returned by the glibc
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
-when a zero value is specified for
|
|
|
5a015b |
-.IR child_stack .
|
|
|
5a015b |
+wrapper function when
|
|
|
5a015b |
+.IR fn
|
|
|
5a015b |
+or
|
|
|
5a015b |
+.IR child_stack
|
|
|
5a015b |
+is specified as NULL.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
.B EINVAL
|
|
|
5a015b |
.BR CLONE_NEWIPC
|
|
|
5a015b |
@@ -971,11 +1029,48 @@ but the kernel was not configured with the
|
|
|
5a015b |
.B CONFIG_UTS
|
|
|
5a015b |
option.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.B EINVAL
|
|
|
5a015b |
+.I child_stack
|
|
|
5a015b |
+is not aligned to a suitable boundary for this architecture.
|
|
|
5a015b |
+For example, on aarch64,
|
|
|
5a015b |
+.I child_stack
|
|
|
5a015b |
+must be a multiple of 16.
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B ENOMEM
|
|
|
5a015b |
Cannot allocate sufficient memory to allocate a task structure for the
|
|
|
5a015b |
child, or to copy those parts of the caller's context that need to be
|
|
|
5a015b |
copied.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 3.7)"
|
|
|
5a015b |
+.\" commit f2302505775fd13ba93f034206f1e2a587017929
|
|
|
5a015b |
+.B CLONE_NEWPID
|
|
|
5a015b |
+was specified in flags,
|
|
|
5a015b |
+but the limit on the nesting depth of PID namespaces
|
|
|
5a015b |
+would have been exceeded; see
|
|
|
5a015b |
+.BR pid_namespaces (7).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 4.9; beforehand " EUSERS )
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+and the call would cause the limit on the number of
|
|
|
5a015b |
+nested user namespaces to be exceeded.
|
|
|
5a015b |
+See
|
|
|
5a015b |
+.BR user_namespaces (7).
|
|
|
5a015b |
+
|
|
|
5a015b |
+From Linux 3.11 to Linux 4.8, the error diagnosed in this case was
|
|
|
5a015b |
+.BR EUSERS .
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR ENOSPC " (since Linux 4.9)"
|
|
|
5a015b |
+One of the values in
|
|
|
5a015b |
+.I flags
|
|
|
5a015b |
+specified the creation of a new user namespace,
|
|
|
5a015b |
+but doing so would have caused the limit defined by the corresponding file in
|
|
|
5a015b |
+.IR /proc/sys/user
|
|
|
5a015b |
+to be exceeded.
|
|
|
5a015b |
+For further details, see
|
|
|
5a015b |
+.BR namespaces (7).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.B EPERM
|
|
|
5a015b |
.BR CLONE_NEWIPC ,
|
|
|
5a015b |
.BR CLONE_NEWNET ,
|
|
|
5a015b |
@@ -989,22 +1084,62 @@ was specified by an unprivileged process (process without \fBCAP_SYS_ADMIN\fP).
|
|
|
5a015b |
.B CLONE_PID
|
|
|
5a015b |
was specified by a process other than process 0.
|
|
|
5a015b |
.TP
|
|
|
5a015b |
+.B EPERM
|
|
|
5a015b |
+.BR CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+but either the effective user ID or the effective group ID of the caller
|
|
|
5a015b |
+does not have a mapping in the parent namespace (see
|
|
|
5a015b |
+.BR user_namespaces (7)).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR EPERM " (since Linux 3.9)"
|
|
|
5a015b |
+.\" commit 3151527ee007b73a0ebd296010f1c0454a919c7d
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.I flags
|
|
|
5a015b |
+and the caller is in a chroot environment
|
|
|
5a015b |
+.\" FIXME What is the rationale for this restriction?
|
|
|
5a015b |
+(i.e., the caller's root directory does not match the root directory
|
|
|
5a015b |
+of the mount namespace in which it resides).
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
.BR ERESTARTNOINTR " (since Linux 2.6.17)"
|
|
|
5a015b |
+.\" commit 4a2c7a7837da1b91468e50426066d988050e4d56
|
|
|
5a015b |
System call was interrupted by a signal and will be restarted.
|
|
|
5a015b |
(This can be seen only during a trace.)
|
|
|
5a015b |
-.SH VERSIONS
|
|
|
5a015b |
-There is no entry for
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-in libc5.
|
|
|
5a015b |
-glibc2 provides
|
|
|
5a015b |
-.BR clone ()
|
|
|
5a015b |
-as described in this manual page.
|
|
|
5a015b |
+.TP
|
|
|
5a015b |
+.BR EUSERS " (Linux 3.11 to Linux 4.8)"
|
|
|
5a015b |
+.B CLONE_NEWUSER
|
|
|
5a015b |
+was specified in
|
|
|
5a015b |
+.IR flags ,
|
|
|
5a015b |
+and the limit on the number of nested user namespaces would be exceeded.
|
|
|
5a015b |
+See the discussion of the
|
|
|
5a015b |
+.BR ENOSPC
|
|
|
5a015b |
+error above.
|
|
|
5a015b |
+.\" .SH VERSIONS
|
|
|
5a015b |
+.\" There is no entry for
|
|
|
5a015b |
+.\" .BR clone ()
|
|
|
5a015b |
+.\" in libc5.
|
|
|
5a015b |
+.\" glibc2 provides
|
|
|
5a015b |
+.\" .BR clone ()
|
|
|
5a015b |
+.\" as described in this manual page.
|
|
|
5a015b |
.SH CONFORMING TO
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
is Linux-specific and should not be used in programs
|
|
|
5a015b |
intended to be portable.
|
|
|
5a015b |
.SH NOTES
|
|
|
5a015b |
-In the kernel 2.4.x series,
|
|
|
5a015b |
+The
|
|
|
5a015b |
+.BR kcmp (2)
|
|
|
5a015b |
+system call can be used to test whether two processes share various
|
|
|
5a015b |
+resources such as a file descriptor table,
|
|
|
5a015b |
+System V semaphore undo operations, or a virtual address space.
|
|
|
5a015b |
+
|
|
|
5a015b |
+
|
|
|
5a015b |
+Handlers registered using
|
|
|
5a015b |
+.BR pthread_atfork (3)
|
|
|
5a015b |
+are not executed during a call to
|
|
|
5a015b |
+.BR clone ().
|
|
|
5a015b |
+
|
|
|
5a015b |
+In the Linux 2.4.x series,
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
generally does not make the parent of the new thread the same
|
|
|
5a015b |
as the parent of the calling process.
|
|
|
5a015b |
@@ -1012,14 +1147,13 @@ However, for kernel versions 2.4.7 to 2.4.18 the
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
flag implied the
|
|
|
5a015b |
.B CLONE_PARENT
|
|
|
5a015b |
-flag (as in kernel 2.6).
|
|
|
5a015b |
+flag (as in Linux 2.6.0 and later).
|
|
|
5a015b |
|
|
|
5a015b |
For a while there was
|
|
|
5a015b |
.B CLONE_DETACHED
|
|
|
5a015b |
(introduced in 2.5.32):
|
|
|
5a015b |
parent wants no child-exit signal.
|
|
|
5a015b |
-In 2.6.2 the need to give this
|
|
|
5a015b |
-together with
|
|
|
5a015b |
+In Linux 2.6.2, the need to give this flag together with
|
|
|
5a015b |
.B CLONE_THREAD
|
|
|
5a015b |
disappeared.
|
|
|
5a015b |
This flag is still defined, but has no effect.
|
|
|
5a015b |
@@ -1071,7 +1205,6 @@ To get the truth, it may be necessary to use code such as the following:
|
|
|
5a015b |
.\" https://bugzilla.redhat.com/show_bug.cgi?id=417521
|
|
|
5a015b |
.\" http://sourceware.org/bugzilla/show_bug.cgi?id=6910
|
|
|
5a015b |
.SH EXAMPLE
|
|
|
5a015b |
-.SS Create a child that executes in a separate UTS namespace
|
|
|
5a015b |
The following program demonstrates the use of
|
|
|
5a015b |
.BR clone ()
|
|
|
5a015b |
to create a child process that executes in a separate UTS namespace.
|
|
|
5a015b |
@@ -1081,7 +1214,7 @@ making it possible to see that the hostname
|
|
|
5a015b |
differs in the UTS namespaces of the parent and child.
|
|
|
5a015b |
For an example of the use of this program, see
|
|
|
5a015b |
.BR setns (2).
|
|
|
5a015b |
-
|
|
|
5a015b |
+.SS Program source
|
|
|
5a015b |
.nf
|
|
|
5a015b |
#define _GNU_SOURCE
|
|
|
5a015b |
#include <sys/wait.h>
|
|
|
5a015b |
@@ -1181,6 +1314,7 @@ main(int argc, char *argv[])
|
|
|
5a015b |
.BR unshare (2),
|
|
|
5a015b |
.BR wait (2),
|
|
|
5a015b |
.BR capabilities (7),
|
|
|
5a015b |
+.BR namespaces (7),
|
|
|
5a015b |
.BR pthreads (7)
|
|
|
5a015b |
.SH COLOPHON
|
|
|
5a015b |
This page is part of release 3.53 of the Linux
|
|
|
5a015b |
--
|
|
|
5a015b |
2.7.4
|
|
|
5a015b |
|