mkbox

experiments with linux sandbox stuff
git clone http://frotz.net/git/mkbox.git
Log | Files | Refs | README

mkbox.c (7265B)


      1 /* mkbox.c
      2  *
      3  * Copyright 2014 Brian Swetland <swetland@frotz.net>
      4  * 
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *     http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 #define _GNU_SOURCE
     19 
     20 #include <stdio.h>
     21 #include <stdlib.h>
     22 #include <unistd.h>
     23 #include <sched.h>
     24 #include <errno.h>
     25 #include <string.h>
     26 #include <sys/stat.h>
     27 #include <sys/types.h>
     28 #include <sys/mount.h>
     29 #include <sys/wait.h>
     30 #include <fcntl.h>
     31 #include <linux/capability.h>
     32 
     33 /* can't find headers for these, but they're in glibc... */
     34 int pivot_root(const char *new_root, const char *put_old);
     35 int capset(cap_user_header_t h, cap_user_data_t d);
     36 int capset(cap_user_header_t h, cap_user_data_t d);
     37 
     38 static int checkreturn(int res, const char *name, int line) {
     39 	if (res >= 0)
     40 		return res;
     41 	fprintf(stderr, "mkbox.c:%d: error: %s() failed: r=%d errno=%d (%s)\n",
     42 		line, name, res, errno, strerror(errno));
     43 	exit(-1);
     44 }
     45 
     46 #define ok(fname, arg...) checkreturn(fname(arg), #fname, __LINE__)
     47 
     48 int dropcaps(void) {
     49 	struct __user_cap_header_struct header;
     50 	struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
     51 	header.version = _LINUX_CAPABILITY_VERSION_3;
     52 	header.pid = 0;
     53 	memset(data, 0, sizeof(data));
     54 	return capset(&header, data);
     55 }
     56 
     57 void usage(void) {
     58 	fprintf(stderr,
     59 "usage: mkbox [ options ]* <root>\n"
     60 "\n"
     61 "options: --with-dev      mount /dev at sandbox's /dev\n"
     62 "                         (otherwise only /dev/{null,zero,random})\n"
     63 "         --with-sys      mount /sys at sandbox's /sys\n"
     64 "         --with-proc     mount /proc at sandbox's /proc\n"
     65 "         --with-tmp      mount tmpfs at sandbox's /tmp\n"
     66 "         --data=<path>   mount <path> at sandbox's /data (rw)\n"
     67 "         --init=<path>   exec <path> in sandbox (default: /bin/sh)\n"
     68 "\n"
     69 	);
     70 }
     71 
     72 int main(int argc, char **argv) {
     73 	int newuid = 3333;
     74 	int newgid = 3333;
     75 	int with_sys = 0;
     76 	int with_proc = 0;
     77 	int with_dev = 0;
     78 	int with_tmp = 0;
     79 	char buf[1024];
     80 	int fd;
     81 	const char *sandbox = NULL;
     82 	const char *databox = NULL;
     83 	const char *initbin = "/bin/sh";
     84 	uid_t uid;
     85 	gid_t gid;
     86 	pid_t cpid;
     87 
     88 	argv++;
     89 	argc--;
     90 	while (argc > 0) {
     91 		if (argv[0][0] != '-') break;
     92 		if (!strcmp(argv[0], "--with-sys")) {
     93 			with_sys = 1;
     94 		} else if (!strcmp(argv[0], "--with-proc")) {
     95 			with_proc = 1;
     96 		} else if (!strcmp(argv[0], "--with-dev")) {
     97 			with_dev = 1;
     98 		} else if (!strcmp(argv[0], "--with-tmp")) {
     99 			with_tmp = 1;
    100 		} else if (!strncmp(argv[0], "--init=", 7)) {
    101 			initbin = argv[0] + 7;
    102 		} else if (!strncmp(argv[0], "--data=", 7)) {
    103 			databox = argv[0] + 7;
    104 		} else {
    105 			usage();
    106 			return -1;
    107 		}
    108 		argv++;
    109 		argc--;
    110 	}
    111 	if (argc != 1) {
    112 		usage();
    113 		return -1;
    114 	}
    115 	sandbox = argv[0];
    116 
    117 	uid = getuid();
    118 	gid = getgid();
    119 
    120 	ok(unshare, CLONE_NEWPID|
    121 		CLONE_NEWNS|CLONE_NEWUTS|
    122 		CLONE_NEWIPC|CLONE_NEWUSER);
    123 
    124 	/* ensure that changes to our mount namespace do not "leak" to
    125 	 * outside namespaces (what mount --make-rprivate / does)
    126 	 */
    127 	mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL);
    128 
    129 	/* mount the sandbox on top of itself in our new namespace */
    130 	/* it will become our root filesystem */
    131 	ok(mount, sandbox, sandbox, NULL, MS_BIND|MS_NOSUID, NULL);
    132 
    133 	/* step inside the to-be-root-directory */
    134 	ok(chdir, sandbox);
    135 
    136 	/* setup needed subdirectories */
    137 	rmdir("data");
    138 	rmdir("dev");
    139 	rmdir(".oldroot");
    140 	ok(mkdir, "data", 0755);
    141 	ok(mkdir, "dev", 0755);
    142 	ok(mkdir, ".oldroot", 0755);
    143 
    144 	if (databox) {
    145 		/* mount read-write data volume */
    146 		ok(mount, databox, "data", NULL, MS_BIND|MS_NOSUID|MS_NODEV, NULL);
    147 	}
    148 
    149 	if (with_proc) {
    150 		rmdir(".oldproc");
    151 		rmdir("proc");
    152 		ok(mkdir, ".oldproc", 0755);
    153 		ok(mkdir, "proc", 0755);
    154 		/* we need to hang on to the old proc in order to mount our
    155 		 * new proc later on
    156 		 */
    157 		ok(mount, "/proc", ".oldproc", NULL, MS_BIND|MS_REC, NULL);
    158 	}
    159 	if (with_sys) {
    160 		rmdir("sys");
    161 		ok(mkdir, "sys", 0755);
    162 		ok(mount, "/sys", "sys", NULL, MS_BIND|MS_REC, NULL);
    163 	}
    164 
    165 	if (with_dev) {
    166 		ok(mount, "/dev", "dev", NULL, MS_BIND|MS_REC, NULL);
    167 	} else {
    168 		/* mount a tmpfs for dev */
    169 		ok(mount, "sandbox-dev", "dev", "tmpfs",
    170 			MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME,
    171 			"size=64k,nr_inodes=16,mode=755");
    172 
    173 		/* populate bare minimum device nodes */
    174 		/* create bind points */
    175 		ok(mknod, "dev/null", S_IFREG | 0666, 0);
    176 		ok(mknod, "dev/zero", S_IFREG | 0666, 0);
    177 		ok(mknod, "dev/random", S_IFREG | 0666, 0);
    178 		ok(mknod, "dev/urandom", S_IFREG | 0666, 0);
    179 
    180 		/* bind mount the device nodes we want */ 
    181 		ok(mount, "/dev/null", "dev/null", NULL, MS_BIND, NULL);
    182 		ok(mount, "/dev/zero", "dev/zero", NULL, MS_BIND, NULL);
    183 		ok(mount, "/dev/urandom", "dev/random", NULL, MS_BIND, NULL);
    184 		ok(mount, "/dev/urandom", "dev/urandom", NULL, MS_BIND, NULL);
    185 
    186 		/* note: MS_RDONLY does not work when doing the initial bind */
    187 		ok(mount, "dev", "dev", NULL,
    188 			MS_REMOUNT | MS_BIND | MS_NOEXEC |
    189 			MS_NOSUID | MS_NODEV | MS_RDONLY,
    190 			NULL);
    191 	}
    192 	if (with_tmp) {
    193 		rmdir("tmp");
    194 		ok(mkdir, "tmp", 0770);
    195 		ok(mount, "sandbox-tmp", "tmp", "tmpfs",
    196 			MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME,
    197 			"size=16m,nr_inodes=4k,mode=770");
    198 	}
    199 
    200 	/* map new UID/GID to outer UID/GID */
    201 	sprintf(buf, "%d %d 1\n", newuid, uid);
    202 	fd = ok(open, "/proc/self/uid_map", O_WRONLY);
    203 	ok(write, fd, buf, strlen(buf));
    204 	ok(close, fd);
    205 
    206 	/* must disallow setgroups() before writing to gid_map on
    207 	 * versions of linux with this feature:
    208 	 */
    209 	if ((fd = open("/proc/self/setgroups", O_WRONLY)) >= 0) {
    210 		ok(write, fd, "deny", 4);
    211 		ok(close, fd);
    212 	}
    213 	sprintf(buf, "%d %d 1\n", newgid, gid);
    214 	fd = ok(open, "/proc/self/gid_map", O_WRONLY);
    215 	ok(write, fd, buf, strlen(buf));
    216 	ok(close, fd);
    217 
    218 	/* initially we're nobody, change to newgid/newuid */
    219 	ok(setresgid, newgid, newgid, newgid);
    220 	ok(setresuid, newuid, newuid, newuid);
    221 
    222 	/* sandbox becomes our new root, detach the old one */
    223 	ok(pivot_root, ".", ".oldroot");
    224 	ok(umount2, ".oldroot", MNT_DETACH);
    225 	ok(rmdir, ".oldroot");
    226 
    227 	/* we must fork to become pid 1 in the new pid namespace */
    228 	cpid = ok(fork);
    229 
    230 	if (cpid == 0) {
    231 		if (getpid() != 1) {
    232 			fprintf(stderr, "mkbox child pid != 1?!\n");
    233 			return -1;
    234 		}
    235 		if (with_proc) {
    236 			ok(mount, "/proc", "/proc", "proc", MS_NOSUID, NULL);
    237 			ok(umount2, "/.oldproc", MNT_DETACH);
    238 			rmdir("/.oldproc");
    239 		}
    240 
    241 		/* remount root to finalize permissions */
    242 		ok(mount, "/", "/", NULL,
    243 			MS_RDONLY|MS_BIND|MS_NOSUID|MS_REMOUNT,
    244 			NULL);
    245 
    246 		/* discard all capability bits */
    247 		ok(dropcaps);
    248 
    249 		ok(execl, initbin, initbin, NULL);
    250 		exit(0);
    251 	}
    252 
    253 	fprintf(stderr, "mkbox: pid=%d, child=%d\n", getpid(), cpid);
    254 	for (;;) {
    255 		int status = 0;
    256 		pid_t pid = wait(&status);
    257 		if (pid < 0) {
    258 			fprintf(stderr, "mkbox: wait() errno=%d\n", errno);
    259 			continue;
    260 		}
    261 		fprintf(stderr, "mkbox: proc %d exited with status %d\n",
    262 			pid, status);
    263 		if (pid == cpid)
    264 			break;
    265 	}
    266 
    267 	fprintf(stderr, "mkbox: exiting\n");
    268 	return 0;
    269 }