Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Written by Dave Hansen <dave.hansen@intel.com> |
| 4 | */ |
| 5 | |
| 6 | #include <stdlib.h> |
| 7 | #include <sys/types.h> |
| 8 | #include <unistd.h> |
| 9 | #include <stdio.h> |
| 10 | #include <errno.h> |
| 11 | #include <sys/types.h> |
| 12 | #include <sys/stat.h> |
| 13 | #include <unistd.h> |
| 14 | #include <sys/mman.h> |
| 15 | #include <string.h> |
| 16 | #include <fcntl.h> |
| 17 | #include "mpx-debug.h" |
| 18 | #include "mpx-mm.h" |
| 19 | #include "mpx-hw.h" |
| 20 | |
| 21 | unsigned long bounds_dir_global; |
| 22 | |
| 23 | #define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__) |
| 24 | static void inline __mpx_dig_abort(const char *file, const char *func, int line) |
| 25 | { |
| 26 | fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func); |
| 27 | printf("MPX dig abort @ %s::%d in %s()\n", file, line, func); |
| 28 | abort(); |
| 29 | } |
| 30 | |
| 31 | /* |
| 32 | * run like this (BDIR finds the probably bounds directory): |
| 33 | * |
| 34 | * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \ |
| 35 | * | head -1 | awk -F- '{print $1}')"; |
| 36 | * ./mpx-dig $pid 0x$BDIR |
| 37 | * |
| 38 | * NOTE: |
| 39 | * assumes that the only 2097152-kb VMA is the bounds dir |
| 40 | */ |
| 41 | |
| 42 | long nr_incore(void *ptr, unsigned long size_bytes) |
| 43 | { |
| 44 | int i; |
| 45 | long ret = 0; |
| 46 | long vec_len = size_bytes / PAGE_SIZE; |
| 47 | unsigned char *vec = malloc(vec_len); |
| 48 | int incore_ret; |
| 49 | |
| 50 | if (!vec) |
| 51 | mpx_dig_abort(); |
| 52 | |
| 53 | incore_ret = mincore(ptr, size_bytes, vec); |
| 54 | if (incore_ret) { |
| 55 | printf("mincore ret: %d\n", incore_ret); |
| 56 | perror("mincore"); |
| 57 | mpx_dig_abort(); |
| 58 | } |
| 59 | for (i = 0; i < vec_len; i++) |
| 60 | ret += vec[i]; |
| 61 | free(vec); |
| 62 | return ret; |
| 63 | } |
| 64 | |
| 65 | int open_proc(int pid, char *file) |
| 66 | { |
| 67 | static char buf[100]; |
| 68 | int fd; |
| 69 | |
| 70 | snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file); |
| 71 | fd = open(&buf[0], O_RDONLY); |
| 72 | if (fd < 0) |
| 73 | perror(buf); |
| 74 | |
| 75 | return fd; |
| 76 | } |
| 77 | |
| 78 | struct vaddr_range { |
| 79 | unsigned long start; |
| 80 | unsigned long end; |
| 81 | }; |
| 82 | struct vaddr_range *ranges; |
| 83 | int nr_ranges_allocated; |
| 84 | int nr_ranges_populated; |
| 85 | int last_range = -1; |
| 86 | |
| 87 | int __pid_load_vaddrs(int pid) |
| 88 | { |
| 89 | int ret = 0; |
| 90 | int proc_maps_fd = open_proc(pid, "maps"); |
| 91 | char linebuf[10000]; |
| 92 | unsigned long start; |
| 93 | unsigned long end; |
| 94 | char rest[1000]; |
| 95 | FILE *f = fdopen(proc_maps_fd, "r"); |
| 96 | |
| 97 | if (!f) |
| 98 | mpx_dig_abort(); |
| 99 | nr_ranges_populated = 0; |
| 100 | while (!feof(f)) { |
| 101 | char *readret = fgets(linebuf, sizeof(linebuf), f); |
| 102 | int parsed; |
| 103 | |
| 104 | if (readret == NULL) { |
| 105 | if (feof(f)) |
| 106 | break; |
| 107 | mpx_dig_abort(); |
| 108 | } |
| 109 | |
| 110 | parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest); |
| 111 | if (parsed != 3) |
| 112 | mpx_dig_abort(); |
| 113 | |
| 114 | dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest); |
| 115 | if (nr_ranges_populated >= nr_ranges_allocated) { |
| 116 | ret = -E2BIG; |
| 117 | break; |
| 118 | } |
| 119 | ranges[nr_ranges_populated].start = start; |
| 120 | ranges[nr_ranges_populated].end = end; |
| 121 | nr_ranges_populated++; |
| 122 | } |
| 123 | last_range = -1; |
| 124 | fclose(f); |
| 125 | close(proc_maps_fd); |
| 126 | return ret; |
| 127 | } |
| 128 | |
| 129 | int pid_load_vaddrs(int pid) |
| 130 | { |
| 131 | int ret; |
| 132 | |
| 133 | dprintf2("%s(%d)\n", __func__, pid); |
| 134 | if (!ranges) { |
| 135 | nr_ranges_allocated = 4; |
| 136 | ranges = malloc(nr_ranges_allocated * sizeof(ranges[0])); |
| 137 | dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid, |
| 138 | nr_ranges_allocated, ranges); |
| 139 | assert(ranges != NULL); |
| 140 | } |
| 141 | do { |
| 142 | ret = __pid_load_vaddrs(pid); |
| 143 | if (!ret) |
| 144 | break; |
| 145 | if (ret == -E2BIG) { |
| 146 | dprintf2("%s(%d) need to realloc\n", __func__, pid); |
| 147 | nr_ranges_allocated *= 2; |
| 148 | ranges = realloc(ranges, |
| 149 | nr_ranges_allocated * sizeof(ranges[0])); |
| 150 | dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, |
| 151 | pid, nr_ranges_allocated, ranges); |
| 152 | assert(ranges != NULL); |
| 153 | dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated); |
| 154 | } |
| 155 | } while (1); |
| 156 | |
| 157 | dprintf2("%s(%d) done\n", __func__, pid); |
| 158 | |
| 159 | return ret; |
| 160 | } |
| 161 | |
| 162 | static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r) |
| 163 | { |
| 164 | if (vaddr < r->start) |
| 165 | return 0; |
| 166 | if (vaddr >= r->end) |
| 167 | return 0; |
| 168 | return 1; |
| 169 | } |
| 170 | |
| 171 | static inline int vaddr_mapped_by_range(unsigned long vaddr) |
| 172 | { |
| 173 | int i; |
| 174 | |
| 175 | if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range])) |
| 176 | return 1; |
| 177 | |
| 178 | for (i = 0; i < nr_ranges_populated; i++) { |
| 179 | struct vaddr_range *r = &ranges[i]; |
| 180 | |
| 181 | if (vaddr_in_range(vaddr, r)) |
| 182 | continue; |
| 183 | last_range = i; |
| 184 | return 1; |
| 185 | } |
| 186 | return 0; |
| 187 | } |
| 188 | |
| 189 | const int bt_entry_size_bytes = sizeof(unsigned long) * 4; |
| 190 | |
| 191 | void *read_bounds_table_into_buf(unsigned long table_vaddr) |
| 192 | { |
| 193 | #ifdef MPX_DIG_STANDALONE |
| 194 | static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES]; |
| 195 | off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET); |
| 196 | if (seek_ret != table_vaddr) |
| 197 | mpx_dig_abort(); |
| 198 | |
| 199 | int read_ret = read(fd, &bt_buf, sizeof(bt_buf)); |
| 200 | if (read_ret != sizeof(bt_buf)) |
| 201 | mpx_dig_abort(); |
| 202 | return &bt_buf; |
| 203 | #else |
| 204 | return (void *)table_vaddr; |
| 205 | #endif |
| 206 | } |
| 207 | |
| 208 | int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr, |
| 209 | unsigned long bde_vaddr) |
| 210 | { |
| 211 | unsigned long offset_inside_bt; |
| 212 | int nr_entries = 0; |
| 213 | int do_abort = 0; |
| 214 | char *bt_buf; |
| 215 | |
| 216 | dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n", |
| 217 | __func__, base_controlled_vaddr, bde_vaddr); |
| 218 | |
| 219 | bt_buf = read_bounds_table_into_buf(table_vaddr); |
| 220 | |
| 221 | dprintf4("%s() read done\n", __func__); |
| 222 | |
| 223 | for (offset_inside_bt = 0; |
| 224 | offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES; |
| 225 | offset_inside_bt += bt_entry_size_bytes) { |
| 226 | unsigned long bt_entry_index; |
| 227 | unsigned long bt_entry_controls; |
| 228 | unsigned long this_bt_entry_for_vaddr; |
| 229 | unsigned long *bt_entry_buf; |
| 230 | int i; |
| 231 | |
| 232 | dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__, |
| 233 | offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES); |
| 234 | bt_entry_buf = (void *)&bt_buf[offset_inside_bt]; |
| 235 | if (!bt_buf) { |
| 236 | printf("null bt_buf\n"); |
| 237 | mpx_dig_abort(); |
| 238 | } |
| 239 | if (!bt_entry_buf) { |
| 240 | printf("null bt_entry_buf\n"); |
| 241 | mpx_dig_abort(); |
| 242 | } |
| 243 | dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__, |
| 244 | bt_entry_buf); |
| 245 | if (!bt_entry_buf[0] && |
| 246 | !bt_entry_buf[1] && |
| 247 | !bt_entry_buf[2] && |
| 248 | !bt_entry_buf[3]) |
| 249 | continue; |
| 250 | |
| 251 | nr_entries++; |
| 252 | |
| 253 | bt_entry_index = offset_inside_bt/bt_entry_size_bytes; |
| 254 | bt_entry_controls = sizeof(void *); |
| 255 | this_bt_entry_for_vaddr = |
| 256 | base_controlled_vaddr + bt_entry_index*bt_entry_controls; |
| 257 | /* |
| 258 | * We sign extend vaddr bits 48->63 which effectively |
| 259 | * creates a hole in the virtual address space. |
| 260 | * This calculation corrects for the hole. |
| 261 | */ |
| 262 | if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL) |
| 263 | this_bt_entry_for_vaddr |= 0xffff800000000000; |
| 264 | |
| 265 | if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) { |
| 266 | printf("bt_entry_buf: %p\n", bt_entry_buf); |
| 267 | printf("there is a bte for %lx but no mapping\n", |
| 268 | this_bt_entry_for_vaddr); |
| 269 | printf(" bde vaddr: %016lx\n", bde_vaddr); |
| 270 | printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr); |
| 271 | printf(" table_vaddr: %016lx\n", table_vaddr); |
| 272 | printf(" entry vaddr: %016lx @ offset %lx\n", |
| 273 | table_vaddr + offset_inside_bt, offset_inside_bt); |
| 274 | do_abort = 1; |
| 275 | mpx_dig_abort(); |
| 276 | } |
| 277 | if (DEBUG_LEVEL < 4) |
| 278 | continue; |
| 279 | |
| 280 | printf("table entry[%lx]: ", offset_inside_bt); |
| 281 | for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long)) |
| 282 | printf("0x%016lx ", bt_entry_buf[i]); |
| 283 | printf("\n"); |
| 284 | } |
| 285 | if (do_abort) |
| 286 | mpx_dig_abort(); |
| 287 | dprintf4("%s() done\n", __func__); |
| 288 | return nr_entries; |
| 289 | } |
| 290 | |
| 291 | int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes, |
| 292 | int *nr_populated_bdes) |
| 293 | { |
| 294 | unsigned long i; |
| 295 | int total_entries = 0; |
| 296 | |
| 297 | dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf, |
| 298 | len_bytes, bd_offset_bytes, buf + len_bytes); |
| 299 | |
| 300 | for (i = 0; i < len_bytes; i += sizeof(unsigned long)) { |
| 301 | unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long); |
| 302 | unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i]; |
| 303 | unsigned long bounds_dir_entry; |
| 304 | unsigned long bd_for_vaddr; |
| 305 | unsigned long bt_start; |
| 306 | unsigned long bt_tail; |
| 307 | int nr_entries; |
| 308 | |
| 309 | dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i, |
| 310 | bounds_dir_entry_ptr); |
| 311 | |
| 312 | bounds_dir_entry = *bounds_dir_entry_ptr; |
| 313 | if (!bounds_dir_entry) { |
| 314 | dprintf4("no bounds dir at index 0x%lx / 0x%lx " |
| 315 | "start at offset:%lx %lx\n", bd_index, bd_index, |
| 316 | bd_offset_bytes, i); |
| 317 | continue; |
| 318 | } |
| 319 | dprintf3("found bounds_dir_entry: 0x%lx @ " |
| 320 | "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i, |
| 321 | &buf[i]); |
| 322 | /* mask off the enable bit: */ |
| 323 | bounds_dir_entry &= ~0x1; |
| 324 | (*nr_populated_bdes)++; |
| 325 | dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes); |
| 326 | dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes); |
| 327 | |
| 328 | bt_start = bounds_dir_entry; |
| 329 | bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1; |
| 330 | if (!vaddr_mapped_by_range(bt_start)) { |
| 331 | printf("bounds directory 0x%lx points to nowhere\n", |
| 332 | bounds_dir_entry); |
| 333 | mpx_dig_abort(); |
| 334 | } |
| 335 | if (!vaddr_mapped_by_range(bt_tail)) { |
| 336 | printf("bounds directory end 0x%lx points to nowhere\n", |
| 337 | bt_tail); |
| 338 | mpx_dig_abort(); |
| 339 | } |
| 340 | /* |
| 341 | * Each bounds directory entry controls 1MB of virtual address |
| 342 | * space. This variable is the virtual address in the process |
| 343 | * of the beginning of the area controlled by this bounds_dir. |
| 344 | */ |
| 345 | bd_for_vaddr = bd_index * (1UL<<20); |
| 346 | |
| 347 | nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr, |
| 348 | bounds_dir_global+bd_offset_bytes+i); |
| 349 | total_entries += nr_entries; |
| 350 | dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries " |
| 351 | "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n", |
| 352 | bd_index, buf+i, |
| 353 | bounds_dir_entry, nr_entries, total_entries, |
| 354 | bd_for_vaddr, bd_for_vaddr + (1UL<<20)); |
| 355 | } |
| 356 | dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes, |
| 357 | bd_offset_bytes); |
| 358 | return total_entries; |
| 359 | } |
| 360 | |
| 361 | int proc_pid_mem_fd = -1; |
| 362 | |
| 363 | void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir, |
| 364 | long buffer_size_bytes, void *buffer) |
| 365 | { |
| 366 | unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir; |
| 367 | int read_ret; |
| 368 | off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET); |
| 369 | |
| 370 | if (seek_ret != seekto) |
| 371 | mpx_dig_abort(); |
| 372 | |
| 373 | read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes); |
| 374 | /* there shouldn't practically be short reads of /proc/$pid/mem */ |
| 375 | if (read_ret != buffer_size_bytes) |
| 376 | mpx_dig_abort(); |
| 377 | |
| 378 | return buffer; |
| 379 | } |
| 380 | void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir, |
| 381 | long buffer_size_bytes, void *buffer) |
| 382 | |
| 383 | { |
| 384 | unsigned char vec[buffer_size_bytes / PAGE_SIZE]; |
| 385 | char *dig_bounds_dir_ptr = |
| 386 | (void *)(bounds_dir_global + byte_offset_inside_bounds_dir); |
| 387 | /* |
| 388 | * use mincore() to quickly find the areas of the bounds directory |
| 389 | * that have memory and thus will be worth scanning. |
| 390 | */ |
| 391 | int incore_ret; |
| 392 | |
| 393 | int incore = 0; |
| 394 | int i; |
| 395 | |
| 396 | dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr); |
| 397 | |
| 398 | incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]); |
| 399 | if (incore_ret) { |
| 400 | printf("mincore ret: %d\n", incore_ret); |
| 401 | perror("mincore"); |
| 402 | mpx_dig_abort(); |
| 403 | } |
| 404 | for (i = 0; i < sizeof(vec); i++) |
| 405 | incore += vec[i]; |
| 406 | dprintf4("%s() total incore: %d\n", __func__, incore); |
| 407 | if (!incore) |
| 408 | return NULL; |
| 409 | dprintf3("%s() total incore: %d\n", __func__, incore); |
| 410 | return dig_bounds_dir_ptr; |
| 411 | } |
| 412 | |
| 413 | int inspect_pid(int pid) |
| 414 | { |
| 415 | static int dig_nr; |
| 416 | long offset_inside_bounds_dir; |
| 417 | char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)]; |
| 418 | char *dig_bounds_dir_ptr; |
| 419 | int total_entries = 0; |
| 420 | int nr_populated_bdes = 0; |
| 421 | int inspect_self; |
| 422 | |
| 423 | if (getpid() == pid) { |
| 424 | dprintf4("inspecting self\n"); |
| 425 | inspect_self = 1; |
| 426 | } else { |
| 427 | dprintf4("inspecting pid %d\n", pid); |
| 428 | mpx_dig_abort(); |
| 429 | } |
| 430 | |
| 431 | for (offset_inside_bounds_dir = 0; |
| 432 | offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES; |
| 433 | offset_inside_bounds_dir += sizeof(bounds_dir_buf)) { |
| 434 | static int bufs_skipped; |
| 435 | int this_entries; |
| 436 | |
| 437 | if (inspect_self) { |
| 438 | dig_bounds_dir_ptr = |
| 439 | fill_bounds_dir_buf_self(offset_inside_bounds_dir, |
| 440 | sizeof(bounds_dir_buf), |
| 441 | &bounds_dir_buf[0]); |
| 442 | } else { |
| 443 | dig_bounds_dir_ptr = |
| 444 | fill_bounds_dir_buf_other(offset_inside_bounds_dir, |
| 445 | sizeof(bounds_dir_buf), |
| 446 | &bounds_dir_buf[0]); |
| 447 | } |
| 448 | if (!dig_bounds_dir_ptr) { |
| 449 | bufs_skipped++; |
| 450 | continue; |
| 451 | } |
| 452 | this_entries = search_bd_buf(dig_bounds_dir_ptr, |
| 453 | sizeof(bounds_dir_buf), |
| 454 | offset_inside_bounds_dir, |
| 455 | &nr_populated_bdes); |
| 456 | total_entries += this_entries; |
| 457 | } |
| 458 | printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr, |
| 459 | total_entries, nr_populated_bdes); |
| 460 | return total_entries + nr_populated_bdes; |
| 461 | } |
| 462 | |
| 463 | #ifdef MPX_DIG_REMOTE |
| 464 | int main(int argc, char **argv) |
| 465 | { |
| 466 | int err; |
| 467 | char *c; |
| 468 | unsigned long bounds_dir_entry; |
| 469 | int pid; |
| 470 | |
| 471 | printf("mpx-dig starting...\n"); |
| 472 | err = sscanf(argv[1], "%d", &pid); |
| 473 | printf("parsing: '%s', err: %d\n", argv[1], err); |
| 474 | if (err != 1) |
| 475 | mpx_dig_abort(); |
| 476 | |
| 477 | err = sscanf(argv[2], "%lx", &bounds_dir_global); |
| 478 | printf("parsing: '%s': %d\n", argv[2], err); |
| 479 | if (err != 1) |
| 480 | mpx_dig_abort(); |
| 481 | |
| 482 | proc_pid_mem_fd = open_proc(pid, "mem"); |
| 483 | if (proc_pid_mem_fd < 0) |
| 484 | mpx_dig_abort(); |
| 485 | |
| 486 | inspect_pid(pid); |
| 487 | return 0; |
| 488 | } |
| 489 | #endif |
| 490 | |
| 491 | long inspect_me(struct mpx_bounds_dir *bounds_dir) |
| 492 | { |
| 493 | int pid = getpid(); |
| 494 | |
| 495 | pid_load_vaddrs(pid); |
| 496 | bounds_dir_global = (unsigned long)bounds_dir; |
| 497 | dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir); |
| 498 | return inspect_pid(pid); |
| 499 | } |