David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | #include <linux/sched.h> |
| 3 | #include <linux/sched/task.h> |
| 4 | #include <linux/sched/task_stack.h> |
| 5 | #include <linux/interrupt.h> |
| 6 | #include <asm/sections.h> |
| 7 | #include <asm/ptrace.h> |
| 8 | #include <asm/bitops.h> |
| 9 | #include <asm/stacktrace.h> |
| 10 | #include <asm/unwind.h> |
| 11 | |
| 12 | #define FRAME_HEADER_SIZE (sizeof(long) * 2) |
| 13 | |
| 14 | unsigned long unwind_get_return_address(struct unwind_state *state) |
| 15 | { |
| 16 | if (unwind_done(state)) |
| 17 | return 0; |
| 18 | |
| 19 | return __kernel_text_address(state->ip) ? state->ip : 0; |
| 20 | } |
| 21 | EXPORT_SYMBOL_GPL(unwind_get_return_address); |
| 22 | |
| 23 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) |
| 24 | { |
| 25 | if (unwind_done(state)) |
| 26 | return NULL; |
| 27 | |
| 28 | return state->regs ? &state->regs->ip : state->bp + 1; |
| 29 | } |
| 30 | |
| 31 | static void unwind_dump(struct unwind_state *state) |
| 32 | { |
| 33 | static bool dumped_before = false; |
| 34 | bool prev_zero, zero = false; |
| 35 | unsigned long word, *sp; |
| 36 | struct stack_info stack_info = {0}; |
| 37 | unsigned long visit_mask = 0; |
| 38 | |
| 39 | if (dumped_before) |
| 40 | return; |
| 41 | |
| 42 | dumped_before = true; |
| 43 | |
| 44 | printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", |
| 45 | state->stack_info.type, state->stack_info.next_sp, |
| 46 | state->stack_mask, state->graph_idx); |
| 47 | |
| 48 | for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; |
| 49 | sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
| 50 | if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) |
| 51 | break; |
| 52 | |
| 53 | for (; sp < stack_info.end; sp++) { |
| 54 | |
| 55 | word = READ_ONCE_NOCHECK(*sp); |
| 56 | |
| 57 | prev_zero = zero; |
| 58 | zero = word == 0; |
| 59 | |
| 60 | if (zero) { |
| 61 | if (!prev_zero) |
| 62 | printk_deferred("%p: %0*x ...\n", |
| 63 | sp, BITS_PER_LONG/4, 0); |
| 64 | continue; |
| 65 | } |
| 66 | |
| 67 | printk_deferred("%p: %0*lx (%pB)\n", |
| 68 | sp, BITS_PER_LONG/4, word, (void *)word); |
| 69 | } |
| 70 | } |
| 71 | } |
| 72 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 73 | static bool in_entry_code(unsigned long ip) |
| 74 | { |
| 75 | char *addr = (char *)ip; |
| 76 | |
| 77 | if (addr >= __entry_text_start && addr < __entry_text_end) |
| 78 | return true; |
| 79 | |
| 80 | if (addr >= __irqentry_text_start && addr < __irqentry_text_end) |
| 81 | return true; |
| 82 | |
| 83 | return false; |
| 84 | } |
| 85 | |
| 86 | static inline unsigned long *last_frame(struct unwind_state *state) |
| 87 | { |
| 88 | return (unsigned long *)task_pt_regs(state->task) - 2; |
| 89 | } |
| 90 | |
| 91 | static bool is_last_frame(struct unwind_state *state) |
| 92 | { |
| 93 | return state->bp == last_frame(state); |
| 94 | } |
| 95 | |
| 96 | #ifdef CONFIG_X86_32 |
| 97 | #define GCC_REALIGN_WORDS 3 |
| 98 | #else |
| 99 | #define GCC_REALIGN_WORDS 1 |
| 100 | #endif |
| 101 | |
| 102 | static inline unsigned long *last_aligned_frame(struct unwind_state *state) |
| 103 | { |
| 104 | return last_frame(state) - GCC_REALIGN_WORDS; |
| 105 | } |
| 106 | |
| 107 | static bool is_last_aligned_frame(struct unwind_state *state) |
| 108 | { |
| 109 | unsigned long *last_bp = last_frame(state); |
| 110 | unsigned long *aligned_bp = last_aligned_frame(state); |
| 111 | |
| 112 | /* |
| 113 | * GCC can occasionally decide to realign the stack pointer and change |
| 114 | * the offset of the stack frame in the prologue of a function called |
| 115 | * by head/entry code. Examples: |
| 116 | * |
| 117 | * <start_secondary>: |
| 118 | * push %edi |
| 119 | * lea 0x8(%esp),%edi |
| 120 | * and $0xfffffff8,%esp |
| 121 | * pushl -0x4(%edi) |
| 122 | * push %ebp |
| 123 | * mov %esp,%ebp |
| 124 | * |
| 125 | * <x86_64_start_kernel>: |
| 126 | * lea 0x8(%rsp),%r10 |
| 127 | * and $0xfffffffffffffff0,%rsp |
| 128 | * pushq -0x8(%r10) |
| 129 | * push %rbp |
| 130 | * mov %rsp,%rbp |
| 131 | * |
| 132 | * After aligning the stack, it pushes a duplicate copy of the return |
| 133 | * address before pushing the frame pointer. |
| 134 | */ |
| 135 | return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); |
| 136 | } |
| 137 | |
| 138 | static bool is_last_ftrace_frame(struct unwind_state *state) |
| 139 | { |
| 140 | unsigned long *last_bp = last_frame(state); |
| 141 | unsigned long *last_ftrace_bp = last_bp - 3; |
| 142 | |
| 143 | /* |
| 144 | * When unwinding from an ftrace handler of a function called by entry |
| 145 | * code, the stack layout of the last frame is: |
| 146 | * |
| 147 | * bp |
| 148 | * parent ret addr |
| 149 | * bp |
| 150 | * function ret addr |
| 151 | * parent ret addr |
| 152 | * pt_regs |
| 153 | * ----------------- |
| 154 | */ |
| 155 | return (state->bp == last_ftrace_bp && |
| 156 | *state->bp == *(state->bp + 2) && |
| 157 | *(state->bp + 1) == *(state->bp + 4)); |
| 158 | } |
| 159 | |
| 160 | static bool is_last_task_frame(struct unwind_state *state) |
| 161 | { |
| 162 | return is_last_frame(state) || is_last_aligned_frame(state) || |
| 163 | is_last_ftrace_frame(state); |
| 164 | } |
| 165 | |
| 166 | /* |
| 167 | * This determines if the frame pointer actually contains an encoded pointer to |
| 168 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. |
| 169 | */ |
| 170 | #ifdef CONFIG_X86_64 |
| 171 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) |
| 172 | { |
| 173 | unsigned long regs = (unsigned long)bp; |
| 174 | |
| 175 | if (!(regs & 0x1)) |
| 176 | return NULL; |
| 177 | |
| 178 | return (struct pt_regs *)(regs & ~0x1); |
| 179 | } |
| 180 | #else |
| 181 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) |
| 182 | { |
| 183 | unsigned long regs = (unsigned long)bp; |
| 184 | |
| 185 | if (regs & 0x80000000) |
| 186 | return NULL; |
| 187 | |
| 188 | return (struct pt_regs *)(regs | 0x80000000); |
| 189 | } |
| 190 | #endif |
| 191 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 192 | static bool update_stack_state(struct unwind_state *state, |
| 193 | unsigned long *next_bp) |
| 194 | { |
| 195 | struct stack_info *info = &state->stack_info; |
| 196 | enum stack_type prev_type = info->type; |
| 197 | struct pt_regs *regs; |
| 198 | unsigned long *frame, *prev_frame_end, *addr_p, addr; |
| 199 | size_t len; |
| 200 | |
| 201 | if (state->regs) |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 202 | prev_frame_end = (void *)state->regs + sizeof(*state->regs); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 203 | else |
| 204 | prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; |
| 205 | |
| 206 | /* Is the next frame pointer an encoded pointer to pt_regs? */ |
| 207 | regs = decode_frame_pointer(next_bp); |
| 208 | if (regs) { |
| 209 | frame = (unsigned long *)regs; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 210 | len = sizeof(*regs); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 211 | state->got_irq = true; |
| 212 | } else { |
| 213 | frame = next_bp; |
| 214 | len = FRAME_HEADER_SIZE; |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * If the next bp isn't on the current stack, switch to the next one. |
| 219 | * |
| 220 | * We may have to traverse multiple stacks to deal with the possibility |
| 221 | * that info->next_sp could point to an empty stack and the next bp |
| 222 | * could be on a subsequent stack. |
| 223 | */ |
| 224 | while (!on_stack(info, frame, len)) |
| 225 | if (get_stack_info(info->next_sp, state->task, info, |
| 226 | &state->stack_mask)) |
| 227 | return false; |
| 228 | |
| 229 | /* Make sure it only unwinds up and doesn't overlap the prev frame: */ |
| 230 | if (state->orig_sp && state->stack_info.type == prev_type && |
| 231 | frame < prev_frame_end) |
| 232 | return false; |
| 233 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 234 | /* Move state to the next frame: */ |
| 235 | if (regs) { |
| 236 | state->regs = regs; |
| 237 | state->bp = NULL; |
| 238 | } else { |
| 239 | state->bp = next_bp; |
| 240 | state->regs = NULL; |
| 241 | } |
| 242 | |
| 243 | /* Save the return address: */ |
| 244 | if (state->regs && user_mode(state->regs)) |
| 245 | state->ip = 0; |
| 246 | else { |
| 247 | addr_p = unwind_get_return_address_ptr(state); |
| 248 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); |
| 249 | state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, |
| 250 | addr, addr_p); |
| 251 | } |
| 252 | |
| 253 | /* Save the original stack pointer for unwind_dump(): */ |
| 254 | if (!state->orig_sp) |
| 255 | state->orig_sp = frame; |
| 256 | |
| 257 | return true; |
| 258 | } |
| 259 | |
| 260 | bool unwind_next_frame(struct unwind_state *state) |
| 261 | { |
| 262 | struct pt_regs *regs; |
| 263 | unsigned long *next_bp; |
| 264 | |
| 265 | if (unwind_done(state)) |
| 266 | return false; |
| 267 | |
| 268 | /* Have we reached the end? */ |
| 269 | if (state->regs && user_mode(state->regs)) |
| 270 | goto the_end; |
| 271 | |
| 272 | if (is_last_task_frame(state)) { |
| 273 | regs = task_pt_regs(state->task); |
| 274 | |
| 275 | /* |
| 276 | * kthreads (other than the boot CPU's idle thread) have some |
| 277 | * partial regs at the end of their stack which were placed |
| 278 | * there by copy_thread_tls(). But the regs don't have any |
| 279 | * useful information, so we can skip them. |
| 280 | * |
| 281 | * This user_mode() check is slightly broader than a PF_KTHREAD |
| 282 | * check because it also catches the awkward situation where a |
| 283 | * newly forked kthread transitions into a user task by calling |
| 284 | * do_execve(), which eventually clears PF_KTHREAD. |
| 285 | */ |
| 286 | if (!user_mode(regs)) |
| 287 | goto the_end; |
| 288 | |
| 289 | /* |
| 290 | * We're almost at the end, but not quite: there's still the |
| 291 | * syscall regs frame. Entry code doesn't encode the regs |
| 292 | * pointer for syscalls, so we have to set it manually. |
| 293 | */ |
| 294 | state->regs = regs; |
| 295 | state->bp = NULL; |
| 296 | state->ip = 0; |
| 297 | return true; |
| 298 | } |
| 299 | |
| 300 | /* Get the next frame pointer: */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 301 | if (state->next_bp) { |
| 302 | next_bp = state->next_bp; |
| 303 | state->next_bp = NULL; |
| 304 | } else if (state->regs) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 305 | next_bp = (unsigned long *)state->regs->bp; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 306 | } else { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 307 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 308 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 309 | |
| 310 | /* Move to the next frame if it's safe: */ |
| 311 | if (!update_stack_state(state, next_bp)) |
| 312 | goto bad_address; |
| 313 | |
| 314 | return true; |
| 315 | |
| 316 | bad_address: |
| 317 | state->error = true; |
| 318 | |
| 319 | /* |
| 320 | * When unwinding a non-current task, the task might actually be |
| 321 | * running on another CPU, in which case it could be modifying its |
| 322 | * stack while we're reading it. This is generally not a problem and |
| 323 | * can be ignored as long as the caller understands that unwinding |
| 324 | * another task will not always succeed. |
| 325 | */ |
| 326 | if (state->task != current) |
| 327 | goto the_end; |
| 328 | |
| 329 | /* |
| 330 | * Don't warn if the unwinder got lost due to an interrupt in entry |
| 331 | * code or in the C handler before the first frame pointer got set up: |
| 332 | */ |
| 333 | if (state->got_irq && in_entry_code(state->ip)) |
| 334 | goto the_end; |
| 335 | if (state->regs && |
| 336 | state->regs->sp >= (unsigned long)last_aligned_frame(state) && |
| 337 | state->regs->sp < (unsigned long)task_pt_regs(state->task)) |
| 338 | goto the_end; |
| 339 | |
| 340 | /* |
| 341 | * There are some known frame pointer issues on 32-bit. Disable |
| 342 | * unwinder warnings on 32-bit until it gets objtool support. |
| 343 | */ |
| 344 | if (IS_ENABLED(CONFIG_X86_32)) |
| 345 | goto the_end; |
| 346 | |
| 347 | if (state->regs) { |
| 348 | printk_deferred_once(KERN_WARNING |
| 349 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", |
| 350 | state->regs, state->task->comm, |
| 351 | state->task->pid, next_bp); |
| 352 | unwind_dump(state); |
| 353 | } else { |
| 354 | printk_deferred_once(KERN_WARNING |
| 355 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", |
| 356 | state->bp, state->task->comm, |
| 357 | state->task->pid, next_bp); |
| 358 | unwind_dump(state); |
| 359 | } |
| 360 | the_end: |
| 361 | state->stack_info.type = STACK_TYPE_UNKNOWN; |
| 362 | return false; |
| 363 | } |
| 364 | EXPORT_SYMBOL_GPL(unwind_next_frame); |
| 365 | |
| 366 | void __unwind_start(struct unwind_state *state, struct task_struct *task, |
| 367 | struct pt_regs *regs, unsigned long *first_frame) |
| 368 | { |
| 369 | unsigned long *bp; |
| 370 | |
| 371 | memset(state, 0, sizeof(*state)); |
| 372 | state->task = task; |
| 373 | state->got_irq = (regs); |
| 374 | |
| 375 | /* Don't even attempt to start from user mode regs: */ |
| 376 | if (regs && user_mode(regs)) { |
| 377 | state->stack_info.type = STACK_TYPE_UNKNOWN; |
| 378 | return; |
| 379 | } |
| 380 | |
| 381 | bp = get_frame_pointer(task, regs); |
| 382 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 383 | /* |
| 384 | * If we crash with IP==0, the last successfully executed instruction |
| 385 | * was probably an indirect function call with a NULL function pointer. |
| 386 | * That means that SP points into the middle of an incomplete frame: |
| 387 | * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we |
| 388 | * would have written a frame pointer if we hadn't crashed. |
| 389 | * Pretend that the frame is complete and that BP points to it, but save |
| 390 | * the real BP so that we can use it when looking for the next frame. |
| 391 | */ |
| 392 | if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { |
| 393 | state->next_bp = bp; |
| 394 | bp = ((unsigned long *)regs->sp) - 1; |
| 395 | } |
| 396 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 397 | /* Initialize stack info and make sure the frame data is accessible: */ |
| 398 | get_stack_info(bp, state->task, &state->stack_info, |
| 399 | &state->stack_mask); |
| 400 | update_stack_state(state, bp); |
| 401 | |
| 402 | /* |
| 403 | * The caller can provide the address of the first frame directly |
| 404 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame |
| 405 | * to start unwinding at. Skip ahead until we reach it. |
| 406 | */ |
| 407 | while (!unwind_done(state) && |
| 408 | (!on_stack(&state->stack_info, first_frame, sizeof(long)) || |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 409 | (state->next_bp == NULL && state->bp < first_frame))) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 410 | unwind_next_frame(state); |
| 411 | } |
| 412 | EXPORT_SYMBOL_GPL(__unwind_start); |