Blob Blame History Raw
commit 99ea60996425f1baa6bcb07d01323b80129c2141
Author: Pavel Šimerda <psimerda@redhat.com>
Date:   Mon Jun 1 18:56:09 2015 +0200

    backport selected ip-netns features

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 794a498..24df167 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -15,180 +15,151 @@
 #include <unistd.h>
 #include <ctype.h>
 
+#include <linux/net_namespace.h>
+
 #include "utils.h"
 #include "ip_common.h"
-
-#define NETNS_RUN_DIR "/var/run/netns"
-#define NETNS_ETC_DIR "/etc/netns"
-
-#ifndef CLONE_NEWNET
-#define CLONE_NEWNET 0x40000000	/* New network namespace (lo, device, names sockets, etc) */
-#endif
-
-#ifndef MNT_DETACH
-#define MNT_DETACH	0x00000002	/* Just detach from the tree */
-#endif /* MNT_DETACH */
-
-/* sys/mount.h may be out too old to have these */
-#ifndef MS_REC
-#define MS_REC		16384
-#endif
-
-#ifndef MS_SLAVE
-#define MS_SLAVE	(1 << 19)
-#endif
-
-#ifndef MS_SHARED
-#define MS_SHARED	(1 << 20)
-#endif
-
-#ifndef HAVE_SETNS
-static int setns(int fd, int nstype)
-{
-#ifdef __NR_setns
-	return syscall(__NR_setns, fd, nstype);
-#else
-	errno = ENOSYS;
-	return -1;
-#endif
-}
-#endif /* HAVE_SETNS */
+#include "namespace.h"
 
 static int usage(void)
 {
 	fprintf(stderr, "Usage: ip netns list\n");
 	fprintf(stderr, "       ip netns add NAME\n");
-	fprintf(stderr, "       ip netns delete NAME\n");
-	fprintf(stderr, "       ip netns identify PID\n");
+	fprintf(stderr, "       ip netns set NAME NETNSID\n");
+	fprintf(stderr, "       ip [-all] netns delete [NAME]\n");
+	fprintf(stderr, "       ip netns identify [PID]\n");
 	fprintf(stderr, "       ip netns pids NAME\n");
-	fprintf(stderr, "       ip netns exec NAME cmd ...\n");
+	fprintf(stderr, "       ip [-all] netns exec [NAME] cmd ...\n");
 	fprintf(stderr, "       ip netns monitor\n");
 	exit(-1);
 }
 
-int get_netns_fd(const char *name)
+static int have_rtnl_getnsid = -1;
+
+static int ipnetns_accept_msg(const struct sockaddr_nl *who,
+			      struct nlmsghdr *n, void *arg)
 {
-	char pathbuf[MAXPATHLEN];
-	const char *path, *ptr;
+	struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(n);
 
-	path = name;
-	ptr = strchr(name, '/');
-	if (!ptr) {
-		snprintf(pathbuf, sizeof(pathbuf), "%s/%s",
-			NETNS_RUN_DIR, name );
-		path = pathbuf;
-	}
-	return open(path, O_RDONLY);
+	if (n->nlmsg_type == NLMSG_ERROR &&
+	    (err->error == -EOPNOTSUPP || err->error == -EINVAL))
+		have_rtnl_getnsid = 0;
+	else
+		have_rtnl_getnsid = 1;
+	return -1;
 }
 
-static int netns_list(int argc, char **argv)
+static int ipnetns_have_nsid(void)
 {
-	struct dirent *entry;
-	DIR *dir;
+	struct {
+		struct nlmsghdr n;
+		struct rtgenmsg g;
+		char            buf[1024];
+	} req;
+	int fd;
 
-	dir = opendir(NETNS_RUN_DIR);
-	if (!dir)
-		return 0;
+	if (have_rtnl_getnsid < 0) {
+		memset(&req, 0, sizeof(req));
+		req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+		req.n.nlmsg_flags = NLM_F_REQUEST;
+		req.n.nlmsg_type = RTM_GETNSID;
+		req.g.rtgen_family = AF_UNSPEC;
 
-	while ((entry = readdir(dir)) != NULL) {
-		if (strcmp(entry->d_name, ".") == 0)
-			continue;
-		if (strcmp(entry->d_name, "..") == 0)
-			continue;
-		printf("%s\n", entry->d_name);
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			perror("open(\"/proc/self/ns/net\")");
+			exit(1);
+		}
+
+		addattr32(&req.n, 1024, NETNSA_FD, fd);
+
+		if (rtnl_send(&rth, &req.n, req.n.nlmsg_len) < 0) {
+			perror("request send failed");
+			exit(1);
+		}
+		rtnl_listen(&rth, ipnetns_accept_msg, NULL);
+		close(fd);
 	}
-	closedir(dir);
-	return 0;
+
+	return have_rtnl_getnsid;
 }
 
-static void bind_etc(const char *name)
+static int get_netnsid_from_name(const char *name)
+{
+	struct {
+		struct nlmsghdr n;
+		struct rtgenmsg g;
+		char            buf[1024];
+	} req, answer;
+	struct rtattr *tb[NETNSA_MAX + 1];
+	struct rtgenmsg *rthdr;
+	int len, fd;
+
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = RTM_GETNSID;
+	req.g.rtgen_family = AF_UNSPEC;
+
+	fd = netns_get_fd(name);
+	if (fd < 0)
+		return fd;
+
+	addattr32(&req.n, 1024, NETNSA_FD, fd);
+	if (rtnl_talk(&rth, &req.n, 0, 0, &answer.n) < 0) {
+		close(fd);
+		return -2;
+	}
+	close(fd);
+
+	/* Validate message and parse attributes */
+	if (answer.n.nlmsg_type == NLMSG_ERROR)
+		return -1;
+
+	rthdr = NLMSG_DATA(&answer.n);
+	len = answer.n.nlmsg_len - NLMSG_SPACE(sizeof(*rthdr));
+	if (len < 0)
+		return -1;
+
+	parse_rtattr(tb, NETNSA_MAX, NETNS_RTA(rthdr), len);
+
+	if (tb[NETNSA_NSID])
+		return rta_getattr_u32(tb[NETNSA_NSID]);
+
+	return -1;
+}
+
+static int netns_list(int argc, char **argv)
 {
-	char etc_netns_path[MAXPATHLEN];
-	char netns_name[MAXPATHLEN];
-	char etc_name[MAXPATHLEN];
 	struct dirent *entry;
 	DIR *dir;
+	int id;
 
-	snprintf(etc_netns_path, sizeof(etc_netns_path), "%s/%s", NETNS_ETC_DIR, name);
-	dir = opendir(etc_netns_path);
+	dir = opendir(NETNS_RUN_DIR);
 	if (!dir)
-		return;
+		return 0;
 
 	while ((entry = readdir(dir)) != NULL) {
 		if (strcmp(entry->d_name, ".") == 0)
 			continue;
 		if (strcmp(entry->d_name, "..") == 0)
 			continue;
-		snprintf(netns_name, sizeof(netns_name), "%s/%s", etc_netns_path, entry->d_name);
-		snprintf(etc_name, sizeof(etc_name), "/etc/%s", entry->d_name);
-		if (mount(netns_name, etc_name, "none", MS_BIND, NULL) < 0) {
-			fprintf(stderr, "Bind %s -> %s failed: %s\n",
-				netns_name, etc_name, strerror(errno));
+		printf("%s", entry->d_name);
+		if (ipnetns_have_nsid()) {
+			id = get_netnsid_from_name(entry->d_name);
+			if (id >= 0)
+				printf(" (id: %d)", id);
 		}
+		printf("\n");
 	}
 	closedir(dir);
+	return 0;
 }
 
-static int netns_exec(int argc, char **argv)
+static int cmd_exec(const char *cmd, char **argv, bool do_fork)
 {
-	/* Setup the proper environment for apps that are not netns
-	 * aware, and execute a program in that environment.
-	 */
-	const char *name, *cmd;
-	char net_path[MAXPATHLEN];
-	int netns;
-
-	if (argc < 1) {
-		fprintf(stderr, "No netns name specified\n");
-		return -1;
-	}
-	if (argc < 2) {
-		fprintf(stderr, "No command specified\n");
-		return -1;
-	}
-
-	name = argv[0];
-	cmd = argv[1];
-	snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name);
-	netns = open(net_path, O_RDONLY | O_CLOEXEC);
-	if (netns < 0) {
-		fprintf(stderr, "Cannot open network namespace \"%s\": %s\n",
-			name, strerror(errno));
-		return -1;
-	}
-
-	if (setns(netns, CLONE_NEWNET) < 0) {
-		fprintf(stderr, "seting the network namespace \"%s\" failed: %s\n",
-			name, strerror(errno));
-		return -1;
-	}
-
-	if (unshare(CLONE_NEWNS) < 0) {
-		fprintf(stderr, "unshare failed: %s\n", strerror(errno));
-		return -1;
-	}
-	/* Don't let any mounts propogate back to the parent */
-	if (mount("", "/", "none", MS_SLAVE | MS_REC, NULL)) {
-		fprintf(stderr, "\"mount --make-rslave /\" failed: %s\n",
-			strerror(errno));
-		return -1;
-	}
-	/* Mount a version of /sys that describes the network namespace */
-	if (umount2("/sys", MNT_DETACH) < 0) {
-		fprintf(stderr, "umount of /sys failed: %s\n", strerror(errno));
-		return -1;
-	}
-	if (mount(name, "/sys", "sysfs", 0, NULL) < 0) {
-		fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno));
-		return -1;
-	}
-
-	/* Setup bind mounts for config files in /etc */
-	bind_etc(name);
-
 	fflush(stdout);
-
-	if (batch_mode) {
+	if (do_fork) {
 		int status;
 		pid_t pid;
 
@@ -205,20 +176,57 @@ static int netns_exec(int argc, char **argv)
 				exit(1);
 			}
 
-			/* If child failed, propogate status */
-			if (WIFEXITED(status))
-				exit(WEXITSTATUS(status));
+			if (WIFEXITED(status)) {
+				return WEXITSTATUS(status);
+			}
 
-			return 0;
+			exit(1);
 		}
 	}
 
-	if (execvp(cmd, argv + 1)  < 0)
+	if (execvp(cmd, argv)  < 0)
 		fprintf(stderr, "exec of \"%s\" failed: %s\n",
-			cmd, strerror(errno));
+				cmd, strerror(errno));
 	_exit(1);
 }
 
+static int on_netns_exec(char *nsname, void *arg)
+{
+	char **argv = arg;
+	cmd_exec(argv[1], argv + 1, true);
+	return 0;
+}
+
+static int netns_exec(int argc, char **argv)
+{
+	/* Setup the proper environment for apps that are not netns
+	 * aware, and execute a program in that environment.
+	 */
+	const char *cmd;
+
+	if (argc < 1 && !do_all) {
+		fprintf(stderr, "No netns name specified\n");
+		return -1;
+	}
+	if ((argc < 2 && !do_all) || (argc < 1 && do_all)) {
+		fprintf(stderr, "No command specified\n");
+		return -1;
+	}
+
+	if (do_all)
+		return do_each_netns(on_netns_exec, --argv, 1);
+
+	if (netns_switch(argv[0]))
+		return -1;
+
+	/* ip must return the status of the child,
+	 * but do_cmd() will add a minus to this,
+	 * so let's add another one here to cancel it.
+	 */
+	cmd = argv[1];
+	return -cmd_exec(cmd, argv + 1, !!batch_mode);
+}
+
 static int is_pid(const char *str)
 {
 	int ch;
@@ -282,7 +290,7 @@ static int netns_pids(int argc, char **argv)
 	}
 	closedir(dir);
 	return 0;
-	
+
 }
 
 static int netns_identify(int argc, char **argv)
@@ -295,19 +303,17 @@ static int netns_identify(int argc, char **argv)
 	struct dirent *entry;
 
 	if (argc < 1) {
-		fprintf(stderr, "No pid specified\n");
-		return -1;
-	}
-	if (argc > 1) {
+		pidstr = "self";
+	} else if (argc > 1) {
 		fprintf(stderr, "extra arguments specified\n");
 		return -1;
-	}
-	pidstr = argv[0];
-
-	if (!is_pid(pidstr)) {
-		fprintf(stderr, "Specified string '%s' is not a pid\n",
-			pidstr);
-		return -1;
+	} else {
+		pidstr = argv[0];
+		if (!is_pid(pidstr)) {
+			fprintf(stderr, "Specified string '%s' is not a pid\n",
+					pidstr);
+			return -1;
+		}
 	}
 
 	snprintf(net_path, sizeof(net_path), "/proc/%s/ns/net", pidstr);
@@ -355,21 +361,14 @@ static int netns_identify(int argc, char **argv)
 	}
 	closedir(dir);
 	return 0;
-	
+
 }
 
-static int netns_delete(int argc, char **argv)
+static int on_netns_del(char *nsname, void *arg)
 {
-	const char *name;
 	char netns_path[MAXPATHLEN];
 
-	if (argc < 1) {
-		fprintf(stderr, "No netns name specified\n");
-		return -1;
-	}
-
-	name = argv[0];
-	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);
+	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, nsname);
 	umount2(netns_path, MNT_DETACH);
 	if (unlink(netns_path) < 0) {
 		fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n",
@@ -379,6 +378,33 @@ static int netns_delete(int argc, char **argv)
 	return 0;
 }
 
+static int netns_delete(int argc, char **argv)
+{
+	if (argc < 1 && !do_all) {
+		fprintf(stderr, "No netns name specified\n");
+		return -1;
+	}
+
+	if (do_all)
+		return netns_foreach(on_netns_del, NULL);
+
+	return on_netns_del(argv[0], NULL);
+}
+
+static int create_netns_dir(void)
+{
+	/* Create the base netns directory if it doesn't exist */
+	if (mkdir(NETNS_RUN_DIR, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)) {
+		if (errno != EEXIST) {
+			fprintf(stderr, "mkdir %s failed: %s\n",
+				NETNS_RUN_DIR, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 static int netns_add(int argc, char **argv)
 {
 	/* This function creates a new network namespace and
@@ -402,10 +428,10 @@ static int netns_add(int argc, char **argv)
 
 	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);
 
-	/* Create the base netns directory if it doesn't exist */
-	mkdir(NETNS_RUN_DIR, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
+	if (create_netns_dir())
+		return -1;
 
-	/* Make it possible for network namespace mounts to propogate between
+	/* Make it possible for network namespace mounts to propagate between
 	 * mount namespaces.  This makes it likely that a unmounting a network
 	 * namespace file in one namespace will unmount the network namespace
 	 * file in all namespaces allowing the network namespace to be freed
@@ -431,7 +457,7 @@ static int netns_add(int argc, char **argv)
 	/* Create the filesystem state */
 	fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);
 	if (fd < 0) {
-		fprintf(stderr, "Cannot not create namespace file \"%s\": %s\n",
+		fprintf(stderr, "Cannot create namespace file \"%s\": %s\n",
 			netns_path, strerror(errno));
 		return -1;
 	}
@@ -454,6 +480,61 @@ out_delete:
 	return -1;
 }
 
+static int set_netnsid_from_name(const char *name, int nsid)
+{
+	struct {
+		struct nlmsghdr n;
+		struct rtgenmsg g;
+		char            buf[1024];
+	} req;
+	int fd, err = 0;
+
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = RTM_NEWNSID;
+	req.g.rtgen_family = AF_UNSPEC;
+
+	fd = netns_get_fd(name);
+	if (fd < 0)
+		return fd;
+
+	addattr32(&req.n, 1024, NETNSA_FD, fd);
+	addattr32(&req.n, 1024, NETNSA_NSID, nsid);
+	if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+		err = -2;
+
+	close(fd);
+	return err;
+}
+
+static int netns_set(int argc, char **argv)
+{
+	char netns_path[MAXPATHLEN];
+	const char *name;
+	int netns, nsid;
+
+	if (argc < 1) {
+		fprintf(stderr, "No netns name specified\n");
+		return -1;
+	}
+	if (argc < 2) {
+		fprintf(stderr, "No nsid specified\n");
+		return -1;
+	}
+	name = argv[0];
+	nsid = atoi(argv[1]);
+
+	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);
+	netns = open(netns_path, O_RDONLY | O_CLOEXEC);
+	if (netns < 0) {
+		fprintf(stderr, "Cannot open network namespace \"%s\": %s\n",
+			name, strerror(errno));
+		return -1;
+	}
+
+	return set_netnsid_from_name(name, nsid);
+}
 
 static int netns_monitor(int argc, char **argv)
 {
@@ -466,6 +547,10 @@ static int netns_monitor(int argc, char **argv)
 			strerror(errno));
 		return -1;
 	}
+
+	if (create_netns_dir())
+		return -1;
+
 	if (inotify_add_watch(fd, NETNS_RUN_DIR, IN_CREATE | IN_DELETE) < 0) {
 		fprintf(stderr, "inotify_add_watch failed: %s\n",
 			strerror(errno));
@@ -505,6 +590,9 @@ int do_netns(int argc, char **argv)
 	if (matches(*argv, "add") == 0)
 		return netns_add(argc-1, argv+1);
 
+	if (matches(*argv, "set") == 0)
+		return netns_set(argc-1, argv+1);
+
 	if (matches(*argv, "delete") == 0)
 		return netns_delete(argc-1, argv+1);
 
diff --git a/man/man8/ip-netns.8 b/man/man8/ip-netns.8
index 6aa6e93..80a4ad1 100644
--- a/man/man8/ip-netns.8
+++ b/man/man8/ip-netns.8
@@ -16,20 +16,28 @@ ip-netns \- process network namespace management
 .BR "ip netns" " { " list " } "
 
 .ti -8
-.BR "ip netns" " { " add " | " delete " } "
+.B ip netns add
 .I NETNSNAME
 
 .ti -8
+.B ip [-all] netns del
+.RI "[ " NETNSNAME " ]"
+
+.ti -8
+.BR "ip netns" " { " set " } "
+.I NETNSNAME NETNSID
+
+.ti -8
 .BR "ip netns identify"
-.I PID
+.RI "[ " PID " ]"
 
 .ti -8
 .BR "ip netns pids"
 .I NETNSNAME
 
 .ti -8
-.BR "ip netns exec "
-.I NETNSNAME command ...
+.BR "ip [-all] netns exec "
+.RI "[ " NETNSNAME " ] " command ...
 
 .ti -8
 .BR "ip netns monitor"
@@ -38,12 +46,15 @@ ip-netns \- process network namespace management
 A network namespace is logically another copy of the network stack,
 with its own routes, firewall rules, and network devices.
 
+By default a process inherits its network namespace from its parent. Initially all
+the processes share the same default network namespace from the init process.
+
 By convention a named network namespace is an object at
 .BR "/var/run/netns/" NAME
-that can be opened.  The file descriptor resulting from opening
+that can be opened. The file descriptor resulting from opening
 .BR "/var/run/netns/" NAME
-refers to the specified network namespace.  Holding that file
-descriptor open keeps the network namespace alive.  The file
+refers to the specified network namespace. Holding that file
+descriptor open keeps the network namespace alive. The file
 descriptor can be used with the
 .B setns(2)
 system call to change the network namespace associated with a task.
@@ -76,19 +87,64 @@ If NAME is available in /var/run/netns/ this command creates a new
 network namespace and assigns NAME.
 
 .TP
-.B ip netns delete NAME - delete the name of a network namespace
+.B ip [-all] netns delete [ NAME ] - delete the name of a network namespace(s)
 .sp
 If NAME is present in /var/run/netns it is umounted and the mount
-point is removed.  If this is the last user of the network namespace the
-network namespace will be freed, otherwise the network namespace
-persists until it has no more users.  ip netns delete may fail if
-the mount point is in use in another mount namespace.
+point is removed. If this is the last user of the network namespace the
+network namespace will be freed and all physical devices will be moved to the
+default one, otherwise the network namespace persists until it has no more
+users. ip netns delete may fail if the mount point is in use in another mount
+namespace.
+
+If
+.B -all
+option was specified then all the network namespace names will be removed.
+
+It is possible to lose the physical device when it was moved to netns and
+then this netns was deleted with a running process:
+
+.RS 10
+$ ip netns add net0
+.RE
+.RS 10
+$ ip link set dev eth0 netns net0
+.RE
+.RS 10
+$ ip netns exec net0 SOME_PROCESS_IN_BACKGROUND
+.RE
+.RS 10
+$ ip netns del net0
+.RE
+
+.RS
+and eth0 will appear in the default netns only after SOME_PROCESS_IN_BACKGROUND
+will exit or will be killed. To prevent this the processes running in net0
+should be killed before deleting the netns:
+
+.RE
+.RS 10
+$ ip netns pids net0 | xargs kill
+.RE
+.RS 10
+$ ip netns del net0
+.RE
 
 .TP
-.B ip netns identify PID - Report network namespaces names for process
+.B ip netns set NAME NETNSID - assign an id to a peer network namespace
+.sp
+This command assigns a id to a peer network namespace. This id is valid
+only in the current network namespace.
+This id will be used by the kernel in some netlink messages. If no id is
+assigned when the kernel needs it, it will be automatically assigned by
+the kernel.
+Once it is assigned, it's not possible to change it.
+
+.TP
+.B ip netns identify [PID] - Report network namespaces names for process
 .sp
 This command walks through /var/run/netns and finds all the network
-namespace names for network namespace of the specified process.
+namespace names for network namespace of the specified process, if PID is
+not specified then the current process will be used.
 
 .TP
 .B ip netns pids NAME - Report processes in the named network namespace
@@ -97,15 +153,25 @@ This command walks through proc and finds all of the process who have
 the named network namespace as their primary network namespace.
 
 .TP
-.B ip netns exec NAME cmd ... - Run cmd in the named network namespace
+.B ip [-all] netns exec [ NAME ] cmd ... - Run cmd in the named network namespace
 .sp
 This command allows applications that are network namespace unaware
 to be run in something other than the default network namespace with
 all of the configuration for the specified network namespace appearing
-in the customary global locations.  A network namespace and bind mounts
+in the customary global locations. A network namespace and bind mounts
 are used to move files from their network namespace specific location
 to their default locations without affecting other processes.
 
+If
+.B -all
+option was specified then
+.B cmd
+will be executed synchronously on the each named network namespace even if
+.B cmd
+fails on some of them. Network namespace name is printed on each
+.B cmd
+executing.
+
 .TP
 .B ip netns monitor - Report as network namespace names are added and deleted
 .sp
--- iproute2-3.10.0/man/man8/ip.8.orig	2015-07-08 19:15:58.468148060 +0200
+++ iproute2-3.10.0/man/man8/ip.8	2015-07-08 19:16:38.078147665 +0200
@@ -132,7 +132,7 @@
 host addresses.
 
 .TP
-.BR "\-n" , " \-net" , " \-netns " <NETNS>
+.BR "\-n" , " \-netns " <NETNS>
 switches
 .B ip
 to the specified network namespace