These tests verify that the uid/gids in an inode and in ACLs get translated
from/to a user namespace to/from disk correctly.
I had to use getfacl instead of the chacl -l because I need numeric uids
to make the output consistent.
A new program nsexec was added to facilitate creating/entering a user
namespace for testing. The orignal source for the program is
https://lwn.net/Articles/539940. I added the -s option to become "root"
in the user namespace.
Tested against btrfs, ext4, and xfs with my proposed user namespace changes.
Signed-off-by: Dwight Engen <dwight.engen@xxxxxxxxxx>
---
.gitignore | 1 +
common/attr | 14 +++
src/Makefile | 2 +-
src/nsexec.c | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++
tests/generic/313 | 107 ++++++++++++++++++++++
tests/generic/313.out | 20 +++++
tests/generic/314 | 102 +++++++++++++++++++++
tests/generic/314.out | 51 +++++++++++
tests/generic/group | 2 +
9 files changed, 537 insertions(+), 1 deletion(-)
create mode 100644 src/nsexec.c
create mode 100755 tests/generic/313
create mode 100644 tests/generic/313.out
create mode 100755 tests/generic/314
create mode 100644 tests/generic/314.out
diff --git a/.gitignore b/.gitignore
index 5aa68c3..fc5050a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,6 +63,7 @@
/src/mmapcat
/src/multi_open_unlink
/src/nametest
+/src/nsexec
/src/permname
/src/preallo_rw_pattern_reader
/src/preallo_rw_pattern_writer
diff --git a/common/attr b/common/attr
index e5070bf..4a3ac9e 100644
--- a/common/attr
+++ b/common/attr
@@ -54,6 +54,20 @@ _acl_filter_id()
-e "s/ $acl3 / id3 /"
}
+_getfacl_filter_id()
+{
+ sed \
+ -e "s/user:$acl1/user:id1/" \
+ -e "s/user:$acl2/user:id2/" \
+ -e "s/user:$acl3/user:id3/" \
+ -e "s/group:$acl1/group:id1/" \
+ -e "s/group:$acl2/group:id2/" \
+ -e "s/group:$acl3/group:id3/" \
+ -e "s/: $acl1/: id1/" \
+ -e "s/: $acl2/: id2/" \
+ -e "s/: $acl3/: id3/"
+}
+
# filtered ls
#
_acl_ls()
diff --git a/src/Makefile b/src/Makefile
index c18ffc9..4eabdc7 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -18,7 +18,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize
preallo_rw_pattern_reader \
locktest unwritten_mmap bulkstat_unlink_test t_stripealign \
bulkstat_unlink_test_modified t_dir_offset t_futimens t_immutable \
stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
- seek_copy_test t_readdir_1 t_readdir_2 fsync-tester
+ seek_copy_test t_readdir_1 t_readdir_2 fsync-tester nsexec
SUBDIRS =
diff --git a/src/nsexec.c b/src/nsexec.c
new file mode 100644
index 0000000..f033b1a
--- /dev/null
+++ b/src/nsexec.c
@@ -0,0 +1,239 @@
+/* userns_child_exec.c
+
+ Copyright 2013, Michael Kerrisk
+ Licensed under GNU General Public License v2 or later
+
+ Create a child process that executes a shell command in new
+ namespace(s); allow UID and GID mappings to be specified when
+ creating a user namespace.
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+/* A simple error-handling function: print an error message based
+ on the value in 'errno' and terminate the calling process */
+
+#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
+ } while (0)
+
+struct child_args {
+ char **argv; /* Command to be executed by child, with arguments */
+ int pipe_fd[2]; /* Pipe used to synchronize parent and child */
+};
+
+static int verbose, setid;
+
+static void
+usage(char *pname)
+{
+ fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname);
+ fprintf(stderr, "Create a child process that executes a shell command "
+ "in a new user namespace,\n"
+ "and possibly also other new namespace(s).\n\n");
+ fprintf(stderr, "Options can be:\n\n");
+#define fpe(str) fprintf(stderr, " %s", str);
+ fpe("-i New IPC namespace\n");
+ fpe("-m New mount namespace\n");
+ fpe("-n New network namespace\n");
+ fpe("-p New PID namespace\n");
+ fpe("-u New UTS namespace\n");
+ fpe("-U New user namespace\n");
+ fpe("-M uid_map Specify UID map for user namespace\n");
+ fpe("-G gid_map Specify GID map for user namespace\n");
+ fpe(" If -M or -G is specified, -U is required\n");
+ fpe("-s Set uid/gid to 0 in the new user namespace\n");
+ fpe("-v Display verbose messages\n");
+ fpe("\n");
+ fpe("Map strings for -M and -G consist of records of the form:\n");
+ fpe("\n");
+ fpe(" ID-inside-ns ID-outside-ns len\n");
+ fpe("\n");
+ fpe("A map string can contain multiple records, separated by commas;\n");
+ fpe("the commas are replaced by newlines before writing to map files.\n");
+
+ exit(EXIT_FAILURE);
+}
+
+/* Update the mapping file 'map_file', with the value provided in
+ 'mapping', a string that defines a UID or GID mapping. A UID or
+ GID mapping consists of one or more newline-delimited records
+ of the form:
+
+ ID_inside-ns ID-outside-ns length
+
+ Requiring the user to supply a string that contains newlines is
+ of course inconvenient for command-line use. Thus, we permit the
+ use of commas to delimit records in this string, and replace them
+ with newlines before writing the string to the file. */
+
+static void
+update_map(char *mapping, char *map_file)
+{
+ int fd, j;
+ size_t map_len; /* Length of 'mapping' */
+
+ /* Replace commas in mapping string with newlines */
+
+ map_len = strlen(mapping);
+ for (j = 0; j < map_len; j++)
+ if (mapping[j] == ',')
+ mapping[j] = '\n';
+
+ fd = open(map_file, O_RDWR);
+ if (fd == -1) {
+ fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (write(fd, mapping, map_len) != map_len) {
+ fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static int /* Start function for cloned child */
+childFunc(void *arg)
+{
+ struct child_args *args = (struct child_args *) arg;
+ char ch;
+
+ /* Wait until the parent has updated the UID and GID mappings. See
+ the comment in main(). We wait for end of file on a pipe that will
+ be closed by the parent process once it has updated the mappings. */
+
+ close(args->pipe_fd[1]); /* Close our descriptor for the write end
+ of the pipe so that we see EOF when
+ parent closes its descriptor */
+ if (read(args->pipe_fd[0], &ch, 1) != 0) {
+ fprintf(stderr, "Failure in child: read from pipe returned != 0\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setid) {
+ if (setgid(0) < 0)
+ fprintf(stderr, "Failure in child to setgid 0: %s\n", strerror(errno));
+ if (setuid(0) < 0)
+ fprintf(stderr, "Failure in child to setuid 0: %s\n", strerror(errno));
+ }
+
+ /* Execute a shell command */
+
+ execvp(args->argv[0], args->argv);
+ errExit("execvp");
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+static char child_stack[STACK_SIZE]; /* Space for child's stack */
+
+int
+main(int argc, char *argv[])
+{
+ int flags, opt;
+ pid_t child_pid;
+ struct child_args args;
+ char *uid_map, *gid_map;
+ char map_path[PATH_MAX];
+
+ /* Parse command-line options. The initial '+' character in
+ the final getopt() argument prevents GNU-style permutation
+ of command-line options. That's useful, since sometimes
+ the 'command' to be executed by this program itself
+ has command-line options. We don't want getopt() to treat
+ those as options to this program. */
+
+ flags = 0;
+ verbose = 0;
+ setid = 0;
+ gid_map = NULL;
+ uid_map = NULL;
+ while ((opt = getopt(argc, argv, "+imnpuUM:G:vs")) != -1) {
+ switch (opt) {
+ case 'i': flags |= CLONE_NEWIPC; break;
+ case 'm': flags |= CLONE_NEWNS; break;
+ case 'n': flags |= CLONE_NEWNET; break;
+ case 'p': flags |= CLONE_NEWPID; break;
+ case 'u': flags |= CLONE_NEWUTS; break;
+ case 'v': verbose = 1; break;
+ case 'M': uid_map = optarg; break;
+ case 'G': gid_map = optarg; break;
+ case 'U': flags |= CLONE_NEWUSER; break;
+ case 's': setid = 1; break;
+ default: usage(argv[0]);
+ }
+ }
+
+ /* -M or -G without -U is nonsensical */
+
+ if ((uid_map != NULL || gid_map != NULL) &&
+ !(flags & CLONE_NEWUSER))
+ usage(argv[0]);
+
+ args.argv = &argv[optind];
+
+ /* We use a pipe to synchronize the parent and child, in order to
+ ensure that the parent sets the UID and GID maps before the child
+ calls execve(). This ensures that the child maintains its
+ capabilities during the execve() in the common case where we
+ want to map the child's effective user ID to 0 in the new user
+ namespace. Without this synchronization, the child would lose
+ its capabilities if it performed an execve() with nonzero
+ user IDs (see the capabilities(7) man page for details of the
+ transformation of a process's capabilities during execve()). */
+
+ if (pipe(args.pipe_fd) == -1)
+ errExit("pipe");
+
+ /* Create the child in new namespace(s) */
+
+ child_pid = clone(childFunc, child_stack + STACK_SIZE,
+ flags | SIGCHLD, &args);
+ if (child_pid == -1)
+ errExit("clone");
+
+ /* Parent falls through to here */
+
+ if (verbose)
+ printf("%s: PID of child created by clone() is %ld\n",
+ argv[0], (long) child_pid);
+
+ /* Update the UID and GID maps in the child */
+
+ if (uid_map != NULL) {
+ snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
+ (long) child_pid);
+ update_map(uid_map, map_path);
+ }
+ if (gid_map != NULL) {
+ snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
+ (long) child_pid);
+ update_map(gid_map, map_path);
+ }
+
+ /* Close the write end of the pipe, to signal to the child that we
+ have updated the UID and GID maps */
+
+ close(args.pipe_fd[1]);
+
+ if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */
+ errExit("waitpid");
+
+ if (verbose)
+ printf("%s: terminating\n", argv[0]);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tests/generic/313 b/tests/generic/313
new file mode 100755
index 0000000..0dd6213
--- /dev/null
+++ b/tests/generic/313
@@ -0,0 +1,107 @@
+#! /bin/bash
+# FS QA Test No. 313
+#
+# Check uid/gid to/from disk with a user namespace. A new file
+# will be created from inside a userns. We check that the uid/gid
+# is correct from both inside the userns and also from init_user_ns.
+# We will then unmount and remount the file system and check the
+# uid/gid from both inside the userns and from init_user_ns to show
+# that the correct uid was flushed and brought back from disk.
+#
+#-----------------------------------------------------------------------
+# Copyright (C) 2013 Oracle, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ cd /
+ umount $SCRATCH_DEV >/dev/null 2>&1
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/attr
+
+nsexec=$here/src/nsexec
+lstat64=$here/src/lstat64
+file=$SCRATCH_MNT/file1
+
+# real QA test starts here
+_supported_fs generic
+# only Linux supports user namespace
+_supported_os Linux
+
+[ -x $nsexec ] || _notrun "$nsexec executable not found"
+[ -x $lstat64 ] || _notrun "$lstat64 executable not found"
+
+rm -f $seqres.full
+
+_require_scratch
+_need_to_be_root
+_require_user
+qa_user_id=`grep $qa_user /etc/passwd |awk -F: '{print $3}'`
+
+_filter_output()
+{
+ sed \
+ -e "s/$qa_user_id/qa_user/g" \
+ -e "s!$SCRATCH_MNT!\$SCRATCH_MNT!"
+}
+
+_print_numeric_uid()
+{
+ echo "From init_user_ns"
+ $here/src/lstat64 $file |head -3 |_filter_output
+
+ echo "From user_ns"
+ $nsexec -s -U -M "0 $qa_user_id 1000" -G "0 $qa_user_id 1000"
$here/src/lstat64 $file |head -3 |_filter_output
+}
+
+umount $SCRATCH_DEV >/dev/null 2>&1
+echo "*** MKFS ***" >>$seqres.full
+echo "" >>$seqres.full
+_scratch_mkfs >>$seqres.full 2>&1 || _fail "mkfs failed"
+_scratch_mount >>$seqres.full 2>&1 || _fail "mount failed"
+chmod 777 $SCRATCH_MNT
+
+# create $file as "root" in userns, which is $qa_user in parent namespace
+$nsexec -s -U -M "0 $qa_user_id 1000" -G "0 $qa_user_id 1000" touch $file
+
+_print_numeric_uid
+
+echo ""
+echo "*** Remounting ***"
+echo ""
+sync
+umount $SCRATCH_MNT >>$seqres.full 2>&1
+_scratch_mount >>$seqres.full 2>&1 || _fail "mount failed"
+
+_print_numeric_uid
+
+umount $SCRATCH_DEV >/dev/null 2>&1
+status=0
+exit
diff --git a/tests/generic/313.out b/tests/generic/313.out
new file mode 100644
index 0000000..eab14c4
--- /dev/null
+++ b/tests/generic/313.out
@@ -0,0 +1,20 @@
+QA output created by 313
+From init_user_ns
+ File: "$SCRATCH_MNT/file1"
+ Size: 0 Filetype: Regular File
+ Mode: (0644/-rw-r--r--) Uid: (qa_user) Gid: (qa_user)
+From user_ns
+ File: "$SCRATCH_MNT/file1"
+ Size: 0 Filetype: Regular File
+ Mode: (0644/-rw-r--r--) Uid: (0) Gid: (0)
+
+*** Remounting ***
+
+From init_user_ns
+ File: "$SCRATCH_MNT/file1"
+ Size: 0 Filetype: Regular File
+ Mode: (0644/-rw-r--r--) Uid: (qa_user) Gid: (qa_user)
+From user_ns
+ File: "$SCRATCH_MNT/file1"
+ Size: 0 Filetype: Regular File
+ Mode: (0644/-rw-r--r--) Uid: (0) Gid: (0)
diff --git a/tests/generic/314 b/tests/generic/314
new file mode 100755
index 0000000..40228b8
--- /dev/null
+++ b/tests/generic/314
@@ -0,0 +1,102 @@
+#! /bin/bash
+# FS QA Test No. 314
+#
+# Check get/set ACLs to/from disk with a user namespace. A new file
+# will be created and ACLs set on it from both inside a userns and
+# from init_user_ns. We check that the ACL is is correct from both
+# inside the userns and also from init_user_ns. We will then unmount
+# and remount the file system and check the ACL from both inside the
+# userns and from init_user_ns to show that the correct uid/gid in
+# the ACL was flushed and brought back from disk.
+#
+#-----------------------------------------------------------------------
+# Copyright (C) 2013 Oracle, Inc. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#-----------------------------------------------------------------------
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1 # failure is the default!
+
+_cleanup()
+{
+ cd /
+ umount $SCRATCH_DEV >/dev/null 2>&1
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+. ./common/attr
+
+nsexec=$here/src/nsexec
+file=$SCRATCH_MNT/file1
+
+# real QA test starts here
+_supported_fs generic
+# only Linux supports user namespace
+_supported_os Linux
+
+[ -x $nsexec ] || _notrun "$nsexec executable not found"
+
+rm -f $seqres.full
+
+_require_scratch
+_need_to_be_root
+_acl_setup_ids
+_require_acls
+
+_print_getfacls()
+{
+ echo "From init_user_ns"
+ getfacl -n $file 2>/dev/null | _getfacl_filter_id | sed -e
"s!$SCRATCH_MNT!\$SCRATCH_MNT!"
+
+ echo "From user_ns"
+ $nsexec -U -M "0 $acl1 1000" -G "0 $acl2 1000" getfacl -n $file
2>/dev/null | _getfacl_filter_id | sed -e "s!$SCRATCH_MNT!\$SCRATCH_MNT!"
+}
+
+umount $SCRATCH_DEV >/dev/null 2>&1
+echo "*** MKFS ***" >>$seqres.full
+echo "" >>$seqres.full
+_scratch_mkfs >>$seqres.full 2>&1 || _fail "mkfs failed"
+_scratch_mount >>$seqres.full 2>&1 || _fail "mount failed"
+
+touch $file
+chown $acl1.$acl1 $file
+
+# set acls from init_user_ns, to be checked from inside the userns
+setfacl -n -m u:$acl2:rw,g:$acl2:r $file
+# set acls from inside userns, to be checked from init_user_ns
+$nsexec -s -U -M "0 $acl1 1000" -G "0 $acl2 1000" setfacl -n -m
u:root:rx,g:root:x $file
+
+_print_getfacls
+
+echo "*** Remounting ***"
+echo ""
+sync
+umount $SCRATCH_MNT >>$seqres.full 2>&1
+_scratch_mount >>$seqres.full 2>&1 || _fail "mount failed"
+
+_print_getfacls
+
+umount $SCRATCH_DEV >/dev/null 2>&1
+status=0
+exit
diff --git a/tests/generic/314.out b/tests/generic/314.out
new file mode 100644
index 0000000..b88354c
--- /dev/null
+++ b/tests/generic/314.out
@@ -0,0 +1,51 @@
+QA output created by 314
+From init_user_ns
+# file: mnt/xfs-scratch/file1
+# owner: id1
+# group: id1
+user::rw-
+user:id1:r-x #effective:r--
+user:id2:rw- #effective:r--
+group::r--
+group:id2:--x #effective:---
+mask::r--
+other::r--
+
+From user_ns
+# file: mnt/xfs-scratch/file1
+# owner: 0
+# group: 65534
+user::rw-
+user:0:r-x #effective:r--
+user:1:rw- #effective:r--
+group::r--
+group:0:--x #effective:---
+mask::r--
+other::r--
+
+*** Remounting ***
+
+From init_user_ns
+# file: mnt/xfs-scratch/file1
+# owner: id1
+# group: id1
+user::rw-
+user:id1:r-x #effective:r--
+user:id2:rw- #effective:r--
+group::r--
+group:id2:--x #effective:---
+mask::r--
+other::r--
+
+From user_ns
+# file: mnt/xfs-scratch/file1
+# owner: 0
+# group: 65534
+user::rw-
+user:0:r-x #effective:r--
+user:1:rw- #effective:r--
+group::r--
+group:0:--x #effective:---
+mask::r--
+other::r--
+
diff --git a/tests/generic/group b/tests/generic/group
index bd443c1..ead1cb1 100644
--- a/tests/generic/group
+++ b/tests/generic/group
@@ -115,3 +115,5 @@
310 auto
311 auto metadata log
312 auto quick prealloc enospc
+313 auto metadata quick
+314 acl attr auto quick
--
1.8.1.4
|