Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/mm.h> |
| 3 | #include <linux/sched.h> |
| 4 | #include <linux/sched/debug.h> |
| 5 | #include <linux/init_task.h> |
| 6 | #include <linux/fs.h> |
| 7 | |
| 8 | #include <linux/uaccess.h> |
| 9 | #include <asm/processor.h> |
| 10 | #include <asm/desc.h> |
| 11 | #include <asm/traps.h> |
| 12 | |
| 13 | #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM) |
| 14 | |
| 15 | #define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x) |
| 16 | |
| 17 | static void set_df_gdt_entry(unsigned int cpu); |
| 18 | |
| 19 | /* |
| 20 | * Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks |
| 21 | * we're running the doublefault task. Cannot return. |
| 22 | */ |
| 23 | asmlinkage noinstr void __noreturn doublefault_shim(void) |
| 24 | { |
| 25 | unsigned long cr2; |
| 26 | struct pt_regs regs; |
| 27 | |
| 28 | BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE); |
| 29 | |
| 30 | cr2 = native_read_cr2(); |
| 31 | |
| 32 | /* Reset back to the normal kernel task. */ |
| 33 | force_reload_TR(); |
| 34 | set_df_gdt_entry(smp_processor_id()); |
| 35 | |
| 36 | trace_hardirqs_off(); |
| 37 | |
| 38 | /* |
| 39 | * Fill in pt_regs. A downside of doing this in C is that the unwinder |
| 40 | * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump |
| 41 | * won't successfully unwind to the source of the double fault. |
| 42 | * The main dump from exc_double_fault() is fine, though, since it |
| 43 | * uses these regs directly. |
| 44 | * |
| 45 | * If anyone ever cares, this could be moved to asm. |
| 46 | */ |
| 47 | regs.ss = TSS(ss); |
| 48 | regs.__ssh = 0; |
| 49 | regs.sp = TSS(sp); |
| 50 | regs.flags = TSS(flags); |
| 51 | regs.cs = TSS(cs); |
| 52 | /* We won't go through the entry asm, so we can leave __csh as 0. */ |
| 53 | regs.__csh = 0; |
| 54 | regs.ip = TSS(ip); |
| 55 | regs.orig_ax = 0; |
| 56 | regs.gs = TSS(gs); |
| 57 | regs.__gsh = 0; |
| 58 | regs.fs = TSS(fs); |
| 59 | regs.__fsh = 0; |
| 60 | regs.es = TSS(es); |
| 61 | regs.__esh = 0; |
| 62 | regs.ds = TSS(ds); |
| 63 | regs.__dsh = 0; |
| 64 | regs.ax = TSS(ax); |
| 65 | regs.bp = TSS(bp); |
| 66 | regs.di = TSS(di); |
| 67 | regs.si = TSS(si); |
| 68 | regs.dx = TSS(dx); |
| 69 | regs.cx = TSS(cx); |
| 70 | regs.bx = TSS(bx); |
| 71 | |
| 72 | exc_double_fault(®s, 0, cr2); |
| 73 | |
| 74 | /* |
| 75 | * x86_32 does not save the original CR3 anywhere on a task switch. |
| 76 | * This means that, even if we wanted to return, we would need to find |
| 77 | * some way to reconstruct CR3. We could make a credible guess based |
| 78 | * on cpu_tlbstate, but that would be racy and would not account for |
| 79 | * PTI. |
| 80 | * |
| 81 | * Instead, don't bother. We can return through |
| 82 | * rewind_stack_do_exit() instead. |
| 83 | */ |
| 84 | panic("cannot return from double fault\n"); |
| 85 | } |
| 86 | |
| 87 | DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = { |
| 88 | .tss = { |
| 89 | /* |
| 90 | * No sp0 or ss0 -- we never run CPL != 0 with this TSS |
| 91 | * active. sp is filled in later. |
| 92 | */ |
| 93 | .ldt = 0, |
| 94 | .io_bitmap_base = IO_BITMAP_OFFSET_INVALID, |
| 95 | |
| 96 | .ip = (unsigned long) asm_exc_double_fault, |
| 97 | .flags = X86_EFLAGS_FIXED, |
| 98 | .es = __USER_DS, |
| 99 | .cs = __KERNEL_CS, |
| 100 | .ss = __KERNEL_DS, |
| 101 | .ds = __USER_DS, |
| 102 | .fs = __KERNEL_PERCPU, |
| 103 | #ifndef CONFIG_X86_32_LAZY_GS |
| 104 | .gs = __KERNEL_STACK_CANARY, |
| 105 | #endif |
| 106 | |
| 107 | .__cr3 = __pa_nodebug(swapper_pg_dir), |
| 108 | }, |
| 109 | }; |
| 110 | |
| 111 | static void set_df_gdt_entry(unsigned int cpu) |
| 112 | { |
| 113 | /* Set up doublefault TSS pointer in the GDT */ |
| 114 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, |
| 115 | &get_cpu_entry_area(cpu)->doublefault_stack.tss); |
| 116 | |
| 117 | } |
| 118 | |
| 119 | void doublefault_init_cpu_tss(void) |
| 120 | { |
| 121 | unsigned int cpu = smp_processor_id(); |
| 122 | struct cpu_entry_area *cea = get_cpu_entry_area(cpu); |
| 123 | |
| 124 | /* |
| 125 | * The linker isn't smart enough to initialize percpu variables that |
| 126 | * point to other places in percpu space. |
| 127 | */ |
| 128 | this_cpu_write(doublefault_stack.tss.sp, |
| 129 | (unsigned long)&cea->doublefault_stack.stack + |
| 130 | sizeof(doublefault_stack.stack)); |
| 131 | |
| 132 | set_df_gdt_entry(cpu); |
| 133 | } |