diff options
| author | Michael Kerrisk <mtk.manpages@gmail.com> | 2013-03-01 08:52:14 +0100 |
|---|---|---|
| committer | Michael Kerrisk <mtk.manpages@gmail.com> | 2014-09-13 20:16:00 -0700 |
| commit | 8d36d80cc307dcb2fba779bfec8881b64b216593 (patch) | |
| tree | e6f6dfce1f67414596135b018559af4d46b37555 /man7/user_namespaces.7 | |
| parent | df23ae04d67ca492dec454d702c69c2fe7607f71 (diff) | |
| download | man-pages-8d36d80cc307dcb2fba779bfec8881b64b216593.tar.gz | |
user_namespaces.7: Add an example program
Signed-off-by: Michael Kerrisk <mtk.manpages@gmail.com>
Diffstat (limited to 'man7/user_namespaces.7')
| -rw-r--r-- | man7/user_namespaces.7 | 334 |
1 files changed, 333 insertions, 1 deletions
diff --git a/man7/user_namespaces.7 b/man7/user_namespaces.7 index 25c51228d6..c2e08ef2e1 100644 --- a/man7/user_namespaces.7 +++ b/man7/user_namespaces.7 @@ -396,6 +396,337 @@ because of their potential to confuse set-user-ID-root applications. In general, it becomes safe to allow the root user in a user namespace to use those features because it is impossible, while in a user namespace, to gain more privilege than the root user of a user namespace has. +.SH EXAMPLE +The program below is designed to allow experimenting with +user namespaces, as well as other types of namespaces. +It creates namespaces as specified by command-line options and then executes +a command inside those namespaces. +The comments and +.I usage() +function inside the program provide a full explanation of the program. +The following shell session demonstrates its use: + +.in +4n +.nf +$ \fBuname -rs\fP # Need Linux 3.8 or later +Linux 3.8.0 +$ \fBid -u\fP # Running as unprivileged user +1000 +$ \fBid -g\fP +1000 +.fi +.in + +Now start a shell in new user +.RI ( \-U ), +mount +.RI ( \-m ), +and PID +.RI ( \-p ) +namespaces, with user ID +.RI ( \-M ) +and group ID 1000 +.RI ( \-G ) +mapped to 0 inside the user namespace: + +.in +4n +.nf +$ \fB./userns_child_exec -p -m -U -M '0 1000 1' -G '0 1000 1' bash\fP +.fi +.in + +The shell has PID 1, because it is the first process in the new +PID namespace: + +.in +4n +.nf +bash$ \fBecho $$\fP +1 +.fi +.in + +Inside the user namespace, the shell has user and group ID 0, +and a full set of permitted and effective capabilities: + +.in +4n +.nf +bash$ \fBcat /proc/$$/status | egrep '^[UG]id'\fP +Uid: 0 0 0 0 +Gid: 0 0 0 0 +bash$ \fBcat /proc/$$/status | egrep '^Cap(Prm|Inh|Eff)'\fP +CapInh: 0000000000000000 +CapPrm: 0000001fffffffff +CapEff: 0000001fffffffff +.fi +.in + +Mounting a new +.I /proc +file system and listing all of the processes visible +in the new PID namespace shows that the shell can't see +any processes outside the PID namespace: + +.in +4n +.nf +bash$ \fBmount -t proc proc /proc\fP +bash$ \fBps ax\fP + PID TTY STAT TIME COMMAND + 1 pts/3 S 0:00 bash + 22 pts/3 R+ 0:00 ps ax +.fi +.in +.SS Program source +\& +.nf +/* userns_child_exec.c + + Licensed under GNU General Public License v2 or later + + Create a child process that executes a shell command in new + namespace(s); allow UID and GID mappings to be specified when + creating a user namespace. +*/ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <signal.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <errno.h> + +/* A simple error\-handling function: print an error message based + on the value in \(aqerrno\(aq and terminate the calling process */ + +#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \\ + } while (0) + +struct child_args { + char **argv; /* Command to be executed by child, with args */ + int pipe_fd[2]; /* Pipe used to synchronize parent and child */ +}; + +static int verbose; + +static void +usage(char *pname) +{ + fprintf(stderr, "Usage: %s [options] cmd [arg...]\\n\\n", pname); + fprintf(stderr, "Create a child process that executes a shell " + "command in a new user namespace,\\n" + "and possibly also other new namespace(s).\\n\\n"); + fprintf(stderr, "Options can be:\\n\\n"); +#define fpe(str) fprintf(stderr, " %s", str); + fpe("\-i New IPC namespace\\n"); + fpe("\-m New mount namespace\\n"); + fpe("\-n New network namespace\\n"); + fpe("\-p New PID namespace\\n"); + fpe("\-u New UTS namespace\\n"); + fpe("\-U New user namespace\\n"); + fpe("\-M uid_map Specify UID map for user namespace\\n"); + fpe("\-G gid_map Specify GID map for user namespace\\n"); + fpe("\-z Map user\(aqs UID and GID to 0 in user namespace\\n"); + fpe(" (equivalent to: \-M \(aq0 <uid> 1\(aq \-G \(aq0 <gid> 1\(aq)\\n"); + fpe("\-v Display verbose messages\\n"); + fpe("\\n"); + fpe("If \-z, \-M, or \-G is specified, \-U is required.\\n"); + fpe("It is not permitted to specify both \-z and either \-M or \-G.\\n"); + fpe("\\n"); + fpe("Map strings for \-M and \-G consist of records of the form:\\n"); + fpe("\\n"); + fpe(" ID\-inside\-ns ID\-outside\-ns len\\n"); + fpe("\\n"); + fpe("A map string can contain multiple records, separated" + " by commas;\\n"); + fpe("the commas are replaced by newlines before writing" + " to map files.\\n"); + + exit(EXIT_FAILURE); +} + +/* Update the mapping file \(aqmap_file\(aq, with the value provided in + \(aqmapping\(aq, a string that defines a UID or GID mapping. A UID or + GID mapping consists of one or more newline\-delimited records + of the form: + + ID_inside\-ns ID\-outside\-ns length + + Requiring the user to supply a string that contains newlines is + of course inconvenient for command\-line use. Thus, we permit the + use of commas to delimit records in this string, and replace them + with newlines before writing the string to the file. */ + +static void +update_map(char *mapping, char *map_file) +{ + int fd, j; + size_t map_len; /* Length of \(aqmapping\(aq */ + + /* Replace commas in mapping string with newlines */ + + map_len = strlen(mapping); + for (j = 0; j < map_len; j++) + if (mapping[j] == \(aq,\(aq) + mapping[j] = \(aq\\n\(aq; + + fd = open(map_file, O_RDWR); + if (fd == \-1) { + fprintf(stderr, "ERROR: open %s: %s\\n", map_file, strerror(errno)); + return; + //exit(EXIT_FAILURE); + } + + if (write(fd, mapping, map_len) != map_len) { + fprintf(stderr, "ERROR: write %s: %s\\n", map_file, strerror(errno)); + //exit(EXIT_FAILURE); + } + + close(fd); +} + +static int /* Start function for cloned child */ +childFunc(void *arg) +{ + struct child_args *args = (struct child_args *) arg; + char ch; + + /* Wait until the parent has updated the UID and GID mappings. + See the comment in main(). We wait for end of file on a + pipe that will be closed by the parent process once it has + updated the mappings. */ + + close(args\->pipe_fd[1]); /* Close our descriptor for the write + end of the pipe so that we see EOF + when parent closes its descriptor */ + if (read(args\->pipe_fd[0], &ch, 1) != 0) { + fprintf(stderr, + "Failure in child: read from pipe returned != 0\\n"); + exit(EXIT_FAILURE); + } + + /* Execute a shell command */ + + printf("About to exec %s\\n", args\->argv[0]); + execvp(args\->argv[0], args\->argv); + errExit("execvp"); +} + +#define STACK_SIZE (1024 * 1024) + +static char child_stack[STACK_SIZE]; /* Space for child\(aqs stack */ + +int +main(int argc, char *argv[]) +{ + int flags, opt, map_zero; + pid_t child_pid; + struct child_args args; + char *uid_map, *gid_map; + const int MAP_BUF_SIZE = 100; + char map_buf[MAP_BUF_SIZE]; + char map_path[PATH_MAX]; + + /* Parse command\-line options. The initial \(aq+\(aq character in + the final getopt() argument prevents GNU\-style permutation + of command\-line options. That\(aqs useful, since sometimes + the \(aqcommand\(aq to be executed by this program itself + has command\-line options. We don\(aqt want getopt() to treat + those as options to this program. */ + + flags = 0; + verbose = 0; + gid_map = NULL; + uid_map = NULL; + map_zero = 0; + while ((opt = getopt(argc, argv, "+imnpuUM:G:zv")) != \-1) { + switch (opt) { + case \(aqi\(aq: flags |= CLONE_NEWIPC; break; + case \(aqm\(aq: flags |= CLONE_NEWNS; break; + case \(aqn\(aq: flags |= CLONE_NEWNET; break; + case \(aqp\(aq: flags |= CLONE_NEWPID; break; + case \(aqu\(aq: flags |= CLONE_NEWUTS; break; + case \(aqv\(aq: verbose = 1; break; + case \(aqz\(aq: map_zero = 1; break; + case \(aqM\(aq: uid_map = optarg; break; + case \(aqG\(aq: gid_map = optarg; break; + case \(aqU\(aq: flags |= CLONE_NEWUSER; break; + default: usage(argv[0]); + } + } + + /* \-M or \-G without \-U is nonsensical */ + + if (((uid_map != NULL || gid_map != NULL || map_zero) && + !(flags & CLONE_NEWUSER)) || + (map_zero && (uid_map != NULL || gid_map != NULL))) + usage(argv[0]); + + args.argv = &argv[optind]; + + /* We use a pipe to synchronize the parent and child, in order to + ensure that the parent sets the UID and GID maps before the child + calls execve(). This ensures that the child maintains its + capabilities during the execve() in the common case where we + want to map the child\(aqs effective user ID to 0 in the new user + namespace. Without this synchronization, the child would lose + its capabilities if it performed an execve() with nonzero + user IDs (see the capabilities(7) man page for details of the + transformation of a process\(aqs capabilities during execve()). */ + + if (pipe(args.pipe_fd) == \-1) + errExit("pipe"); + + /* Create the child in new namespace(s) */ + + child_pid = clone(childFunc, child_stack + STACK_SIZE, + flags | SIGCHLD, &args); + if (child_pid == \-1) + errExit("clone"); + + /* Parent falls through to here */ + + if (verbose) + printf("%s: PID of child created by clone() is %ld\\n", + argv[0], (long) child_pid); + + /* Update the UID and GID maps in the child */ + + if (uid_map != NULL || map_zero) { + snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map", + (long) child_pid); + if (map_zero) { + snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getuid()); + uid_map = map_buf; + } + update_map(uid_map, map_path); + } + if (gid_map != NULL || map_zero) { + snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map", + (long) child_pid); + if (map_zero) { + snprintf(map_buf, MAP_BUF_SIZE, "0 %ld 1", (long) getgid()); + gid_map = map_buf; + } + update_map(gid_map, map_path); + } + + /* Close the write end of the pipe, to signal to the child that we + have updated the UID and GID maps */ + + close(args.pipe_fd[1]); + + if (waitpid(child_pid, NULL, 0) == \-1) /* Wait for child */ + errExit("waitpid"); + + if (verbose) + printf("%s: terminating\\n", argv[0]); + + exit(EXIT_SUCCESS); +} +.fi .SH SEE ALSO .BR unshare (1), .BR clone (2), @@ -404,4 +735,5 @@ to gain more privilege than the root user of a user namespace has. .BR proc (5), .BR credentials (7), .BR capabilities (7) -.BR namespaces (7) +.BR namespaces (7), +.BR pid_namespaces (7) |
