David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com> |
| 3 | * |
| 4 | * Permission to use, copy, modify, and distribute this software for any |
| 5 | * purpose with or without fee is hereby granted, provided that the above |
| 6 | * copyright notice and this permission notice appear in all copies. |
| 7 | * |
| 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
| 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
| 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 15 | */ |
| 16 | /* |
| 17 | * Fork and exec tiny 1 page executable which precisely controls its VM. |
| 18 | * Test /proc/$PID/maps |
| 19 | * Test /proc/$PID/smaps |
| 20 | * Test /proc/$PID/smaps_rollup |
| 21 | * Test /proc/$PID/statm |
| 22 | * |
| 23 | * FIXME require CONFIG_TMPFS which can be disabled |
| 24 | * FIXME test other values from "smaps" |
| 25 | * FIXME support other archs |
| 26 | */ |
| 27 | #undef NDEBUG |
| 28 | #include <assert.h> |
| 29 | #include <errno.h> |
| 30 | #include <sched.h> |
| 31 | #include <signal.h> |
| 32 | #include <stdbool.h> |
| 33 | #include <stdint.h> |
| 34 | #include <stdio.h> |
| 35 | #include <string.h> |
| 36 | #include <stdlib.h> |
| 37 | #include <sys/mount.h> |
| 38 | #include <sys/types.h> |
| 39 | #include <sys/stat.h> |
| 40 | #include <sys/wait.h> |
| 41 | #include <fcntl.h> |
| 42 | #include <unistd.h> |
| 43 | #include <sys/syscall.h> |
| 44 | #include <sys/uio.h> |
| 45 | #include <linux/kdev_t.h> |
| 46 | #include <sys/time.h> |
| 47 | #include <sys/resource.h> |
| 48 | |
| 49 | static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) |
| 50 | { |
| 51 | return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags); |
| 52 | } |
| 53 | |
| 54 | static void make_private_tmp(void) |
| 55 | { |
| 56 | if (unshare(CLONE_NEWNS) == -1) { |
| 57 | if (errno == ENOSYS || errno == EPERM) { |
| 58 | exit(4); |
| 59 | } |
| 60 | exit(1); |
| 61 | } |
| 62 | if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { |
| 63 | exit(1); |
| 64 | } |
| 65 | if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) { |
| 66 | exit(1); |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | static pid_t pid = -1; |
| 71 | static void ate(void) |
| 72 | { |
| 73 | if (pid > 0) { |
| 74 | kill(pid, SIGTERM); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | struct elf64_hdr { |
| 79 | uint8_t e_ident[16]; |
| 80 | uint16_t e_type; |
| 81 | uint16_t e_machine; |
| 82 | uint32_t e_version; |
| 83 | uint64_t e_entry; |
| 84 | uint64_t e_phoff; |
| 85 | uint64_t e_shoff; |
| 86 | uint32_t e_flags; |
| 87 | uint16_t e_ehsize; |
| 88 | uint16_t e_phentsize; |
| 89 | uint16_t e_phnum; |
| 90 | uint16_t e_shentsize; |
| 91 | uint16_t e_shnum; |
| 92 | uint16_t e_shstrndx; |
| 93 | }; |
| 94 | |
| 95 | struct elf64_phdr { |
| 96 | uint32_t p_type; |
| 97 | uint32_t p_flags; |
| 98 | uint64_t p_offset; |
| 99 | uint64_t p_vaddr; |
| 100 | uint64_t p_paddr; |
| 101 | uint64_t p_filesz; |
| 102 | uint64_t p_memsz; |
| 103 | uint64_t p_align; |
| 104 | }; |
| 105 | |
| 106 | #ifdef __x86_64__ |
| 107 | #define PAGE_SIZE 4096 |
| 108 | #define VADDR (1UL << 32) |
| 109 | #define MAPS_OFFSET 73 |
| 110 | |
| 111 | #define syscall 0x0f, 0x05 |
| 112 | #define mov_rdi(x) \ |
| 113 | 0x48, 0xbf, \ |
| 114 | (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ |
| 115 | ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff |
| 116 | |
| 117 | #define mov_rsi(x) \ |
| 118 | 0x48, 0xbe, \ |
| 119 | (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \ |
| 120 | ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff |
| 121 | |
| 122 | #define mov_eax(x) \ |
| 123 | 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff |
| 124 | |
| 125 | static const uint8_t payload[] = { |
| 126 | /* Casually unmap stack, vDSO and everything else. */ |
| 127 | /* munmap */ |
| 128 | mov_rdi(VADDR + 4096), |
| 129 | mov_rsi((1ULL << 47) - 4096 - VADDR - 4096), |
| 130 | mov_eax(11), |
| 131 | syscall, |
| 132 | |
| 133 | /* Ping parent. */ |
| 134 | /* write(0, &c, 1); */ |
| 135 | 0x31, 0xff, /* xor edi, edi */ |
| 136 | 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */ |
| 137 | 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */ |
| 138 | mov_eax(1), |
| 139 | syscall, |
| 140 | |
| 141 | /* 1: pause(); */ |
| 142 | mov_eax(34), |
| 143 | syscall, |
| 144 | |
| 145 | 0xeb, 0xf7, /* jmp 1b */ |
| 146 | }; |
| 147 | |
| 148 | static int make_exe(const uint8_t *payload, size_t len) |
| 149 | { |
| 150 | struct elf64_hdr h; |
| 151 | struct elf64_phdr ph; |
| 152 | |
| 153 | struct iovec iov[3] = { |
| 154 | {&h, sizeof(struct elf64_hdr)}, |
| 155 | {&ph, sizeof(struct elf64_phdr)}, |
| 156 | {(void *)payload, len}, |
| 157 | }; |
| 158 | int fd, fd1; |
| 159 | char buf[64]; |
| 160 | |
| 161 | memset(&h, 0, sizeof(h)); |
| 162 | h.e_ident[0] = 0x7f; |
| 163 | h.e_ident[1] = 'E'; |
| 164 | h.e_ident[2] = 'L'; |
| 165 | h.e_ident[3] = 'F'; |
| 166 | h.e_ident[4] = 2; |
| 167 | h.e_ident[5] = 1; |
| 168 | h.e_ident[6] = 1; |
| 169 | h.e_ident[7] = 0; |
| 170 | h.e_type = 2; |
| 171 | h.e_machine = 0x3e; |
| 172 | h.e_version = 1; |
| 173 | h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr); |
| 174 | h.e_phoff = sizeof(struct elf64_hdr); |
| 175 | h.e_shoff = 0; |
| 176 | h.e_flags = 0; |
| 177 | h.e_ehsize = sizeof(struct elf64_hdr); |
| 178 | h.e_phentsize = sizeof(struct elf64_phdr); |
| 179 | h.e_phnum = 1; |
| 180 | h.e_shentsize = 0; |
| 181 | h.e_shnum = 0; |
| 182 | h.e_shstrndx = 0; |
| 183 | |
| 184 | memset(&ph, 0, sizeof(ph)); |
| 185 | ph.p_type = 1; |
| 186 | ph.p_flags = (1<<2)|1; |
| 187 | ph.p_offset = 0; |
| 188 | ph.p_vaddr = VADDR; |
| 189 | ph.p_paddr = 0; |
| 190 | ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; |
| 191 | ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len; |
| 192 | ph.p_align = 4096; |
| 193 | |
| 194 | fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700); |
| 195 | if (fd == -1) { |
| 196 | exit(1); |
| 197 | } |
| 198 | |
| 199 | if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) { |
| 200 | exit(1); |
| 201 | } |
| 202 | |
| 203 | /* Avoid ETXTBSY on exec. */ |
| 204 | snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd); |
| 205 | fd1 = open(buf, O_RDONLY|O_CLOEXEC); |
| 206 | close(fd); |
| 207 | |
| 208 | return fd1; |
| 209 | } |
| 210 | #endif |
| 211 | |
| 212 | static bool g_vsyscall = false; |
| 213 | |
| 214 | static const char str_vsyscall[] = |
| 215 | "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; |
| 216 | |
| 217 | #ifdef __x86_64__ |
| 218 | static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) |
| 219 | { |
| 220 | _exit(1); |
| 221 | } |
| 222 | |
| 223 | /* |
| 224 | * vsyscall page can't be unmapped, probe it with memory load. |
| 225 | */ |
| 226 | static void vsyscall(void) |
| 227 | { |
| 228 | pid_t pid; |
| 229 | int wstatus; |
| 230 | |
| 231 | pid = fork(); |
| 232 | if (pid < 0) { |
| 233 | fprintf(stderr, "fork, errno %d\n", errno); |
| 234 | exit(1); |
| 235 | } |
| 236 | if (pid == 0) { |
| 237 | struct rlimit rlim = {0, 0}; |
| 238 | (void)setrlimit(RLIMIT_CORE, &rlim); |
| 239 | |
| 240 | /* Hide "segfault at ffffffffff600000" messages. */ |
| 241 | struct sigaction act; |
| 242 | memset(&act, 0, sizeof(struct sigaction)); |
| 243 | act.sa_flags = SA_SIGINFO; |
| 244 | act.sa_sigaction = sigaction_SIGSEGV; |
| 245 | (void)sigaction(SIGSEGV, &act, NULL); |
| 246 | |
| 247 | *(volatile int *)0xffffffffff600000UL; |
| 248 | exit(0); |
| 249 | } |
| 250 | waitpid(pid, &wstatus, 0); |
| 251 | if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { |
| 252 | g_vsyscall = true; |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | int main(void) |
| 257 | { |
| 258 | int pipefd[2]; |
| 259 | int exec_fd; |
| 260 | |
| 261 | vsyscall(); |
| 262 | |
| 263 | atexit(ate); |
| 264 | |
| 265 | make_private_tmp(); |
| 266 | |
| 267 | /* Reserve fd 0 for 1-byte pipe ping from child. */ |
| 268 | close(0); |
| 269 | if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) { |
| 270 | return 1; |
| 271 | } |
| 272 | |
| 273 | exec_fd = make_exe(payload, sizeof(payload)); |
| 274 | |
| 275 | if (pipe(pipefd) == -1) { |
| 276 | return 1; |
| 277 | } |
| 278 | if (dup2(pipefd[1], 0) != 0) { |
| 279 | return 1; |
| 280 | } |
| 281 | |
| 282 | pid = fork(); |
| 283 | if (pid == -1) { |
| 284 | return 1; |
| 285 | } |
| 286 | if (pid == 0) { |
| 287 | sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH); |
| 288 | return 1; |
| 289 | } |
| 290 | |
| 291 | char _; |
| 292 | if (read(pipefd[0], &_, 1) != 1) { |
| 293 | return 1; |
| 294 | } |
| 295 | |
| 296 | struct stat st; |
| 297 | if (fstat(exec_fd, &st) == -1) { |
| 298 | return 1; |
| 299 | } |
| 300 | |
| 301 | /* Generate "head -n1 /proc/$PID/maps" */ |
| 302 | char buf0[256]; |
| 303 | memset(buf0, ' ', sizeof(buf0)); |
| 304 | int len = snprintf(buf0, sizeof(buf0), |
| 305 | "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu", |
| 306 | VADDR, VADDR + PAGE_SIZE, |
| 307 | MAJOR(st.st_dev), MINOR(st.st_dev), |
| 308 | (unsigned long long)st.st_ino); |
| 309 | buf0[len] = ' '; |
| 310 | snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET, |
| 311 | "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino); |
| 312 | |
| 313 | /* Test /proc/$PID/maps */ |
| 314 | { |
| 315 | const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0); |
| 316 | char buf[256]; |
| 317 | ssize_t rv; |
| 318 | int fd; |
| 319 | |
| 320 | snprintf(buf, sizeof(buf), "/proc/%u/maps", pid); |
| 321 | fd = open(buf, O_RDONLY); |
| 322 | if (fd == -1) { |
| 323 | return 1; |
| 324 | } |
| 325 | rv = read(fd, buf, sizeof(buf)); |
| 326 | assert(rv == len); |
| 327 | assert(memcmp(buf, buf0, strlen(buf0)) == 0); |
| 328 | if (g_vsyscall) { |
| 329 | assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | /* Test /proc/$PID/smaps */ |
| 334 | { |
| 335 | char buf[4096]; |
| 336 | ssize_t rv; |
| 337 | int fd; |
| 338 | |
| 339 | snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid); |
| 340 | fd = open(buf, O_RDONLY); |
| 341 | if (fd == -1) { |
| 342 | return 1; |
| 343 | } |
| 344 | rv = read(fd, buf, sizeof(buf)); |
| 345 | assert(0 <= rv && rv <= sizeof(buf)); |
| 346 | |
| 347 | assert(rv >= strlen(buf0)); |
| 348 | assert(memcmp(buf, buf0, strlen(buf0)) == 0); |
| 349 | |
| 350 | #define RSS1 "Rss: 4 kB\n" |
| 351 | #define RSS2 "Rss: 0 kB\n" |
| 352 | #define PSS1 "Pss: 4 kB\n" |
| 353 | #define PSS2 "Pss: 0 kB\n" |
| 354 | assert(memmem(buf, rv, RSS1, strlen(RSS1)) || |
| 355 | memmem(buf, rv, RSS2, strlen(RSS2))); |
| 356 | assert(memmem(buf, rv, PSS1, strlen(PSS1)) || |
| 357 | memmem(buf, rv, PSS2, strlen(PSS2))); |
| 358 | |
| 359 | static const char *S[] = { |
| 360 | "Size: 4 kB\n", |
| 361 | "KernelPageSize: 4 kB\n", |
| 362 | "MMUPageSize: 4 kB\n", |
| 363 | "Anonymous: 0 kB\n", |
| 364 | "AnonHugePages: 0 kB\n", |
| 365 | "Shared_Hugetlb: 0 kB\n", |
| 366 | "Private_Hugetlb: 0 kB\n", |
| 367 | "Locked: 0 kB\n", |
| 368 | }; |
| 369 | int i; |
| 370 | |
| 371 | for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { |
| 372 | assert(memmem(buf, rv, S[i], strlen(S[i]))); |
| 373 | } |
| 374 | |
| 375 | if (g_vsyscall) { |
| 376 | assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | /* Test /proc/$PID/smaps_rollup */ |
| 381 | { |
| 382 | char bufr[256]; |
| 383 | memset(bufr, ' ', sizeof(bufr)); |
| 384 | len = snprintf(bufr, sizeof(bufr), |
| 385 | "%08lx-%08lx ---p 00000000 00:00 0", |
| 386 | VADDR, VADDR + PAGE_SIZE); |
| 387 | bufr[len] = ' '; |
| 388 | snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET, |
| 389 | "[rollup]\n"); |
| 390 | |
| 391 | char buf[1024]; |
| 392 | ssize_t rv; |
| 393 | int fd; |
| 394 | |
| 395 | snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid); |
| 396 | fd = open(buf, O_RDONLY); |
| 397 | if (fd == -1) { |
| 398 | return 1; |
| 399 | } |
| 400 | rv = read(fd, buf, sizeof(buf)); |
| 401 | assert(0 <= rv && rv <= sizeof(buf)); |
| 402 | |
| 403 | assert(rv >= strlen(bufr)); |
| 404 | assert(memcmp(buf, bufr, strlen(bufr)) == 0); |
| 405 | |
| 406 | assert(memmem(buf, rv, RSS1, strlen(RSS1)) || |
| 407 | memmem(buf, rv, RSS2, strlen(RSS2))); |
| 408 | assert(memmem(buf, rv, PSS1, strlen(PSS1)) || |
| 409 | memmem(buf, rv, PSS2, strlen(PSS2))); |
| 410 | |
| 411 | static const char *S[] = { |
| 412 | "Anonymous: 0 kB\n", |
| 413 | "AnonHugePages: 0 kB\n", |
| 414 | "Shared_Hugetlb: 0 kB\n", |
| 415 | "Private_Hugetlb: 0 kB\n", |
| 416 | "Locked: 0 kB\n", |
| 417 | }; |
| 418 | int i; |
| 419 | |
| 420 | for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) { |
| 421 | assert(memmem(buf, rv, S[i], strlen(S[i]))); |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | /* Test /proc/$PID/statm */ |
| 426 | { |
| 427 | char buf[64]; |
| 428 | ssize_t rv; |
| 429 | int fd; |
| 430 | |
| 431 | snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); |
| 432 | fd = open(buf, O_RDONLY); |
| 433 | if (fd == -1) { |
| 434 | return 1; |
| 435 | } |
| 436 | rv = read(fd, buf, sizeof(buf)); |
| 437 | assert(rv == 7 * 2); |
| 438 | |
| 439 | assert(buf[0] == '1'); /* ->total_vm */ |
| 440 | assert(buf[1] == ' '); |
| 441 | assert(buf[2] == '0' || buf[2] == '1'); /* rss */ |
| 442 | assert(buf[3] == ' '); |
| 443 | assert(buf[4] == '0' || buf[2] == '1'); /* file rss */ |
| 444 | assert(buf[5] == ' '); |
| 445 | assert(buf[6] == '1'); /* ELF executable segments */ |
| 446 | assert(buf[7] == ' '); |
| 447 | assert(buf[8] == '0'); |
| 448 | assert(buf[9] == ' '); |
| 449 | assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */ |
| 450 | assert(buf[11] == ' '); |
| 451 | assert(buf[12] == '0'); |
| 452 | assert(buf[13] == '\n'); |
| 453 | } |
| 454 | |
| 455 | return 0; |
| 456 | } |
| 457 | #else |
| 458 | int main(void) |
| 459 | { |
| 460 | return 4; |
| 461 | } |
| 462 | #endif |