aboutsummaryrefslogtreecommitdiffstats
path: root/man7/user_namespaces.7
diff options
context:
space:
mode:
authorMichael Kerrisk <mtk.manpages@gmail.com>2013-03-01 08:52:14 +0100
committerMichael Kerrisk <mtk.manpages@gmail.com>2014-09-13 20:16:00 -0700
commit8d36d80cc307dcb2fba779bfec8881b64b216593 (patch)
treee6f6dfce1f67414596135b018559af4d46b37555 /man7/user_namespaces.7
parentdf23ae04d67ca492dec454d702c69c2fe7607f71 (diff)
downloadman-pages-8d36d80cc307dcb2fba779bfec8881b64b216593.tar.gz
user_namespaces.7: Add an example program
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Diffstat (limited to 'man7/user_namespaces.7')
-rw-r--r--man7/user_namespaces.7334
1 files changed, 333 insertions, 1 deletions
diff --git a/man7/user_namespaces.7 b/man7/user_namespaces.7
index 25c51228d6..c2e08ef2e1 100644
--- a/man7/user_namespaces.7
+++ b/man7/user_namespaces.7
@@ -396,6 +396,337 @@ because of their potential to confuse set-user-ID-root applications.
In general, it becomes safe to allow the root user in a user namespace to
use those features because it is impossible, while in a user namespace,
to gain more privilege than the root user of a user namespace has.
+.SH EXAMPLE
+The program below is designed to allow experimenting with
+user namespaces, as well as other types of namespaces.
+It creates namespaces as specified by command-line options and then executes
+a command inside those namespaces.
+The comments and
+.I usage()
+function inside the program provide a full explanation of the program.
+The following shell session demonstrates its use:
+
+.in +4n
+.nf
+$ \fBuname -rs\fP # Need Linux 3.8 or later
+Linux 3.8.0
+$ \fBid -u\fP # Running as unprivileged user
+1000
+$ \fBid -g\fP
+1000
+.fi
+.in
+
+Now start a shell in new user
+.RI ( \-U ),
+mount
+.RI ( \-m ),
+and PID
+.RI ( \-p )
+namespaces, with user ID
+.RI ( \-M )
+and group ID 1000
+.RI ( \-G )
+mapped to 0 inside the user namespace:
+
+.in +4n
+.nf
+$ \fB./userns_child_exec -p -m -U -M '0 1000 1' -G '0 1000 1' bash\fP
+.fi
+.in
+
+The shell has PID 1, because it is the first process in the new
+PID namespace:
+
+.in +4n
+.nf
+bash$ \fBecho $$\fP
+1
+.fi
+.in
+
+Inside the user namespace, the shell has user and group ID 0,
+and a full set of permitted and effective capabilities:
+
+.in +4n
+.nf
+bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP
+Uid: 0 0 0 0
+Gid: 0 0 0 0
+bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP
+CapInh: 0000000000000000
+CapPrm: 0000001fffffffff
+CapEff: 0000001fffffffff
+.fi
+.in
+
+Mounting a new
+.I /proc
+file system and listing all of the processes visible
+in the new PID namespace shows that the shell can't see
+any processes outside the PID namespace:
+
+.in +4n
+.nf
+bash$ \fBmount -t proc proc /proc\fP
+bash$ \fBps ax\fP
+ PID TTY STAT TIME COMMAND
+ 1 pts/3 S 0:00 bash
+ 22 pts/3 R+ 0:00 ps ax
+.fi
+.in
+.SS Program source
+\&
+.nf
+/* userns_child_exec.c
+
+ Licensed under GNU General Public License v2 or later
+
+ Create a child process that executes a shell command in new
+ namespace(s); allow UID and GID mappings to be specified when
+ creating a user namespace.
+*/
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+/* A simple error\-handling function: print an error message based
+ on the value in \(aqerrno\(aq and terminate the calling process */
+
+#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\
+ } while (0)
+
+struct child_args {
+ char **argv; /* Command to be executed by child, with args */
+ int pipe_fd[2]; /* Pipe used to synchronize parent and child */
+};
+
+static int verbose;
+
+static void
+usage(char *pname)
+{
+ fprintf(stderr, "Usage: %s [options] cmd [arg...]\\n\\n", pname);
+ fprintf(stderr, "Create a child process that executes a shell "
+ "command in a new user namespace,\\n"
+ "and possibly also other new namespace(s).\\n\\n");
+ fprintf(stderr, "Options can be:\\n\\n");
+#define fpe(str) fprintf(stderr, " %s", str);
+ fpe("\-i New IPC namespace\\n");
+ fpe("\-m New mount namespace\\n");
+ fpe("\-n New network namespace\\n");
+ fpe("\-p New PID namespace\\n");
+ fpe("\-u New UTS namespace\\n");
+ fpe("\-U New user namespace\\n");
+ fpe("\-M uid_map Specify UID map for user namespace\\n");
+ fpe("\-G gid_map Specify GID map for user namespace\\n");
+ fpe("\-z Map user\(aqs UID and GID to 0 in user namespace\\n");
+ fpe(" (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\\n");
+ fpe("\-v Display verbose messages\\n");
+ fpe("\\n");
+ fpe("If \-z, \-M, or \-G is specified, \-U is required.\\n");
+ fpe("It is not permitted to specify both \-z and either \-M or \-G.\\n");
+ fpe("\\n");
+ fpe("Map strings for \-M and \-G consist of records of the form:\\n");
+ fpe("\\n");
+ fpe(" ID\-inside\-ns ID\-outside\-ns len\\n");
+ fpe("\\n");
+ fpe("A map string can contain multiple records, separated"
+ " by commas;\\n");
+ fpe("the commas are replaced by newlines before writing"
+ " to map files.\\n");
+
+ exit(EXIT_FAILURE);
+}
+
+/* Update the mapping file \(aqmap_file\(aq, with the value provided in
+ \(aqmapping\(aq, a string that defines a UID or GID mapping. A UID or
+ GID mapping consists of one or more newline\-delimited records
+ of the form:
+
+ ID_inside\-ns ID\-outside\-ns length
+
+ Requiring the user to supply a string that contains newlines is
+ of course inconvenient for command\-line use. Thus, we permit the
+ use of commas to delimit records in this string, and replace them
+ with newlines before writing the string to the file. */
+
+static void
+update_map(char *mapping, char *map_file)
+{
+ int fd, j;
+ size_t map_len; /* Length of \(aqmapping\(aq */
+
+ /* Replace commas in mapping string with newlines */
+
+ map_len = strlen(mapping);
+ for (j = 0; j < map_len; j++)
+ if (mapping[j] == \(aq,\(aq)
+ mapping[j] = \(aq\\n\(aq;
+
+ fd = open(map_file, O_RDWR);
+ if (fd == \-1) {
+ fprintf(stderr, "ERROR: open %s: %s\\n", map_file, strerror(errno));
+ return;
+ //exit(EXIT_FAILURE);
+ }
+
+ if (write(fd, mapping, map_len) != map_len) {
+ fprintf(stderr, "ERROR: write %s: %s\\n", map_file, strerror(errno));
+ //exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static int /* Start function for cloned child */
+childFunc(void *arg)
+{
+ struct child_args *args = (struct child_args *) arg;
+ char ch;
+
+ /* Wait until the parent has updated the UID and GID mappings.
+ See the comment in main(). We wait for end of file on a
+ pipe that will be closed by the parent process once it has
+ updated the mappings. */
+
+ close(args\->pipe_fd[1]); /* Close our descriptor for the write
+ end of the pipe so that we see EOF
+ when parent closes its descriptor */
+ if (read(args\->pipe_fd[0], &ch, 1) != 0) {
+ fprintf(stderr,
+ "Failure in child: read from pipe returned != 0\\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Execute a shell command */
+
+ printf("About to exec %s\\n", args\->argv[0]);
+ execvp(args\->argv[0], args\->argv);
+ errExit("execvp");
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+static char child_stack[STACK_SIZE]; /* Space for child\(aqs stack */
+
+int
+main(int argc, char *argv[])
+{
+ int flags, opt, map_zero;
+ pid_t child_pid;
+ struct child_args args;
+ char *uid_map, *gid_map;
+ const int MAP_BUF_SIZE = 100;
+ char map_buf[MAP_BUF_SIZE];
+ char map_path[PATH_MAX];
+
+ /* Parse command\-line options. The initial \(aq+\(aq character in
+ the final getopt() argument prevents GNU\-style permutation
+ of command\-line options. That\(aqs useful, since sometimes
+ the \(aqcommand\(aq to be executed by this program itself
+ has command\-line options. We don\(aqt want getopt() to treat
+ those as options to this program. */
+
+ flags = 0;
+ verbose = 0;
+ gid_map = NULL;
+ uid_map = NULL;
+ map_zero = 0;
+ while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != \-1) {
+ switch (opt) {
+ case \(aqi\(aq: flags |= CLONE_NEWIPC; break;
+ case \(aqm\(aq: flags |= CLONE_NEWNS; break;
+ case \(aqn\(aq: flags |= CLONE_NEWNET; break;
+ case \(aqp\(aq: flags |= CLONE_NEWPID; break;
+ case \(aqu\(aq: flags |= CLONE_NEWUTS; break;
+ case \(aqv\(aq: verbose = 1; break;
+ case \(aqz\(aq: map_zero = 1; break;
+ case \(aqM\(aq: uid_map = optarg; break;
+ case \(aqG\(aq: gid_map = optarg; break;
+ case \(aqU\(aq: flags |= CLONE_NEWUSER; break;
+ default: usage(argv[0]);
+ }
+ }
+
+ /* \-M or \-G without \-U is nonsensical */
+
+ if (((uid_map != NULL || gid_map != NULL || map_zero) &&
+ !(flags & CLONE_NEWUSER)) ||
+ (map_zero && (uid_map != NULL || gid_map != NULL)))
+ usage(argv[0]);
+
+ args.argv = &argv[optind];
+
+ /* We use a pipe to synchronize the parent and child, in order to
+ ensure that the parent sets the UID and GID maps before the child
+ calls execve(). This ensures that the child maintains its
+ capabilities during the execve() in the common case where we
+ want to map the child\(aqs effective user ID to 0 in the new user
+ namespace. Without this synchronization, the child would lose
+ its capabilities if it performed an execve() with nonzero
+ user IDs (see the capabilities(7) man page for details of the
+ transformation of a process\(aqs capabilities during execve()). */
+
+ if (pipe(args.pipe_fd) == \-1)
+ errExit("pipe");
+
+ /* Create the child in new namespace(s) */
+
+ child_pid = clone(childFunc, child_stack + STACK_SIZE,
+ flags | SIGCHLD, &args);
+ if (child_pid == \-1)
+ errExit("clone");
+
+ /* Parent falls through to here */
+
+ if (verbose)
+ printf("%s: PID of child created by clone() is %ld\\n",
+ argv[0], (long) child_pid);
+
+ /* Update the UID and GID maps in the child */
+
+ if (uid_map != NULL || map_zero) {
+ snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
+ (long) child_pid);
+ if (map_zero) {
+ snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getuid());
+ uid_map = map_buf;
+ }
+ update_map(uid_map, map_path);
+ }
+ if (gid_map != NULL || map_zero) {
+ snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
+ (long) child_pid);
+ if (map_zero) {
+ snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getgid());
+ gid_map = map_buf;
+ }
+ update_map(gid_map, map_path);
+ }
+
+ /* Close the write end of the pipe, to signal to the child that we
+ have updated the UID and GID maps */
+
+ close(args.pipe_fd[1]);
+
+ if (waitpid(child_pid, NULL, 0) == \-1) /* Wait for child */
+ errExit("waitpid");
+
+ if (verbose)
+ printf("%s: terminating\\n", argv[0]);
+
+ exit(EXIT_SUCCESS);
+}
+.fi
.SH SEE ALSO
.BR unshare (1),
.BR clone (2),
@@ -404,4 +735,5 @@ to gain more privilege than the root user of a user namespace has.
.BR proc (5),
.BR credentials (7),
.BR capabilities (7)
-.BR namespaces (7)
+.BR namespaces (7),
+.BR pid_namespaces (7)