mkbox.c (7265B)
1 /* mkbox.c 2 * 3 * Copyright 2014 Brian Swetland <swetland@frotz.net> 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #define _GNU_SOURCE 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <unistd.h> 23 #include <sched.h> 24 #include <errno.h> 25 #include <string.h> 26 #include <sys/stat.h> 27 #include <sys/types.h> 28 #include <sys/mount.h> 29 #include <sys/wait.h> 30 #include <fcntl.h> 31 #include <linux/capability.h> 32 33 /* can't find headers for these, but they're in glibc... */ 34 int pivot_root(const char *new_root, const char *put_old); 35 int capset(cap_user_header_t h, cap_user_data_t d); 36 int capset(cap_user_header_t h, cap_user_data_t d); 37 38 static int checkreturn(int res, const char *name, int line) { 39 if (res >= 0) 40 return res; 41 fprintf(stderr, "mkbox.c:%d: error: %s() failed: r=%d errno=%d (%s)\n", 42 line, name, res, errno, strerror(errno)); 43 exit(-1); 44 } 45 46 #define ok(fname, arg...) checkreturn(fname(arg), #fname, __LINE__) 47 48 int dropcaps(void) { 49 struct __user_cap_header_struct header; 50 struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; 51 header.version = _LINUX_CAPABILITY_VERSION_3; 52 header.pid = 0; 53 memset(data, 0, sizeof(data)); 54 return capset(&header, data); 55 } 56 57 void usage(void) { 58 fprintf(stderr, 59 "usage: mkbox [ options ]* <root>\n" 60 "\n" 61 "options: --with-dev mount /dev at sandbox's /dev\n" 62 " (otherwise only /dev/{null,zero,random})\n" 63 " --with-sys mount /sys at sandbox's /sys\n" 64 " --with-proc mount /proc at sandbox's /proc\n" 65 " --with-tmp mount tmpfs at sandbox's /tmp\n" 66 " --data=<path> mount <path> at sandbox's /data (rw)\n" 67 " --init=<path> exec <path> in sandbox (default: /bin/sh)\n" 68 "\n" 69 ); 70 } 71 72 int main(int argc, char **argv) { 73 int newuid = 3333; 74 int newgid = 3333; 75 int with_sys = 0; 76 int with_proc = 0; 77 int with_dev = 0; 78 int with_tmp = 0; 79 char buf[1024]; 80 int fd; 81 const char *sandbox = NULL; 82 const char *databox = NULL; 83 const char *initbin = "/bin/sh"; 84 uid_t uid; 85 gid_t gid; 86 pid_t cpid; 87 88 argv++; 89 argc--; 90 while (argc > 0) { 91 if (argv[0][0] != '-') break; 92 if (!strcmp(argv[0], "--with-sys")) { 93 with_sys = 1; 94 } else if (!strcmp(argv[0], "--with-proc")) { 95 with_proc = 1; 96 } else if (!strcmp(argv[0], "--with-dev")) { 97 with_dev = 1; 98 } else if (!strcmp(argv[0], "--with-tmp")) { 99 with_tmp = 1; 100 } else if (!strncmp(argv[0], "--init=", 7)) { 101 initbin = argv[0] + 7; 102 } else if (!strncmp(argv[0], "--data=", 7)) { 103 databox = argv[0] + 7; 104 } else { 105 usage(); 106 return -1; 107 } 108 argv++; 109 argc--; 110 } 111 if (argc != 1) { 112 usage(); 113 return -1; 114 } 115 sandbox = argv[0]; 116 117 uid = getuid(); 118 gid = getgid(); 119 120 ok(unshare, CLONE_NEWPID| 121 CLONE_NEWNS|CLONE_NEWUTS| 122 CLONE_NEWIPC|CLONE_NEWUSER); 123 124 /* ensure that changes to our mount namespace do not "leak" to 125 * outside namespaces (what mount --make-rprivate / does) 126 */ 127 mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL); 128 129 /* mount the sandbox on top of itself in our new namespace */ 130 /* it will become our root filesystem */ 131 ok(mount, sandbox, sandbox, NULL, MS_BIND|MS_NOSUID, NULL); 132 133 /* step inside the to-be-root-directory */ 134 ok(chdir, sandbox); 135 136 /* setup needed subdirectories */ 137 rmdir("data"); 138 rmdir("dev"); 139 rmdir(".oldroot"); 140 ok(mkdir, "data", 0755); 141 ok(mkdir, "dev", 0755); 142 ok(mkdir, ".oldroot", 0755); 143 144 if (databox) { 145 /* mount read-write data volume */ 146 ok(mount, databox, "data", NULL, MS_BIND|MS_NOSUID|MS_NODEV, NULL); 147 } 148 149 if (with_proc) { 150 rmdir(".oldproc"); 151 rmdir("proc"); 152 ok(mkdir, ".oldproc", 0755); 153 ok(mkdir, "proc", 0755); 154 /* we need to hang on to the old proc in order to mount our 155 * new proc later on 156 */ 157 ok(mount, "/proc", ".oldproc", NULL, MS_BIND|MS_REC, NULL); 158 } 159 if (with_sys) { 160 rmdir("sys"); 161 ok(mkdir, "sys", 0755); 162 ok(mount, "/sys", "sys", NULL, MS_BIND|MS_REC, NULL); 163 } 164 165 if (with_dev) { 166 ok(mount, "/dev", "dev", NULL, MS_BIND|MS_REC, NULL); 167 } else { 168 /* mount a tmpfs for dev */ 169 ok(mount, "sandbox-dev", "dev", "tmpfs", 170 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME, 171 "size=64k,nr_inodes=16,mode=755"); 172 173 /* populate bare minimum device nodes */ 174 /* create bind points */ 175 ok(mknod, "dev/null", S_IFREG | 0666, 0); 176 ok(mknod, "dev/zero", S_IFREG | 0666, 0); 177 ok(mknod, "dev/random", S_IFREG | 0666, 0); 178 ok(mknod, "dev/urandom", S_IFREG | 0666, 0); 179 180 /* bind mount the device nodes we want */ 181 ok(mount, "/dev/null", "dev/null", NULL, MS_BIND, NULL); 182 ok(mount, "/dev/zero", "dev/zero", NULL, MS_BIND, NULL); 183 ok(mount, "/dev/urandom", "dev/random", NULL, MS_BIND, NULL); 184 ok(mount, "/dev/urandom", "dev/urandom", NULL, MS_BIND, NULL); 185 186 /* note: MS_RDONLY does not work when doing the initial bind */ 187 ok(mount, "dev", "dev", NULL, 188 MS_REMOUNT | MS_BIND | MS_NOEXEC | 189 MS_NOSUID | MS_NODEV | MS_RDONLY, 190 NULL); 191 } 192 if (with_tmp) { 193 rmdir("tmp"); 194 ok(mkdir, "tmp", 0770); 195 ok(mount, "sandbox-tmp", "tmp", "tmpfs", 196 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_NOATIME, 197 "size=16m,nr_inodes=4k,mode=770"); 198 } 199 200 /* map new UID/GID to outer UID/GID */ 201 sprintf(buf, "%d %d 1\n", newuid, uid); 202 fd = ok(open, "/proc/self/uid_map", O_WRONLY); 203 ok(write, fd, buf, strlen(buf)); 204 ok(close, fd); 205 206 /* must disallow setgroups() before writing to gid_map on 207 * versions of linux with this feature: 208 */ 209 if ((fd = open("/proc/self/setgroups", O_WRONLY)) >= 0) { 210 ok(write, fd, "deny", 4); 211 ok(close, fd); 212 } 213 sprintf(buf, "%d %d 1\n", newgid, gid); 214 fd = ok(open, "/proc/self/gid_map", O_WRONLY); 215 ok(write, fd, buf, strlen(buf)); 216 ok(close, fd); 217 218 /* initially we're nobody, change to newgid/newuid */ 219 ok(setresgid, newgid, newgid, newgid); 220 ok(setresuid, newuid, newuid, newuid); 221 222 /* sandbox becomes our new root, detach the old one */ 223 ok(pivot_root, ".", ".oldroot"); 224 ok(umount2, ".oldroot", MNT_DETACH); 225 ok(rmdir, ".oldroot"); 226 227 /* we must fork to become pid 1 in the new pid namespace */ 228 cpid = ok(fork); 229 230 if (cpid == 0) { 231 if (getpid() != 1) { 232 fprintf(stderr, "mkbox child pid != 1?!\n"); 233 return -1; 234 } 235 if (with_proc) { 236 ok(mount, "/proc", "/proc", "proc", MS_NOSUID, NULL); 237 ok(umount2, "/.oldproc", MNT_DETACH); 238 rmdir("/.oldproc"); 239 } 240 241 /* remount root to finalize permissions */ 242 ok(mount, "/", "/", NULL, 243 MS_RDONLY|MS_BIND|MS_NOSUID|MS_REMOUNT, 244 NULL); 245 246 /* discard all capability bits */ 247 ok(dropcaps); 248 249 ok(execl, initbin, initbin, NULL); 250 exit(0); 251 } 252 253 fprintf(stderr, "mkbox: pid=%d, child=%d\n", getpid(), cpid); 254 for (;;) { 255 int status = 0; 256 pid_t pid = wait(&status); 257 if (pid < 0) { 258 fprintf(stderr, "mkbox: wait() errno=%d\n", errno); 259 continue; 260 } 261 fprintf(stderr, "mkbox: proc %d exited with status %d\n", 262 pid, status); 263 if (pid == cpid) 264 break; 265 } 266 267 fprintf(stderr, "mkbox: exiting\n"); 268 return 0; 269 }