Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Hibernation support for x86-64 |
| 3 | * |
| 4 | * Distribute under GPLv2 |
| 5 | * |
| 6 | * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl> |
| 7 | * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz> |
| 8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> |
| 9 | */ |
| 10 | |
| 11 | #include <linux/gfp.h> |
| 12 | #include <linux/smp.h> |
| 13 | #include <linux/suspend.h> |
| 14 | #include <linux/scatterlist.h> |
| 15 | #include <linux/kdebug.h> |
| 16 | |
| 17 | #include <crypto/hash.h> |
| 18 | |
| 19 | #include <asm/e820/api.h> |
| 20 | #include <asm/init.h> |
| 21 | #include <asm/proto.h> |
| 22 | #include <asm/page.h> |
| 23 | #include <asm/pgtable.h> |
| 24 | #include <asm/mtrr.h> |
| 25 | #include <asm/sections.h> |
| 26 | #include <asm/suspend.h> |
| 27 | #include <asm/tlbflush.h> |
| 28 | |
| 29 | /* Defined in hibernate_asm_64.S */ |
| 30 | extern asmlinkage __visible int restore_image(void); |
| 31 | |
| 32 | /* |
| 33 | * Address to jump to in the last phase of restore in order to get to the image |
| 34 | * kernel's text (this value is passed in the image header). |
| 35 | */ |
| 36 | unsigned long restore_jump_address __visible; |
| 37 | unsigned long jump_address_phys; |
| 38 | |
| 39 | /* |
| 40 | * Value of the cr3 register from before the hibernation (this value is passed |
| 41 | * in the image header). |
| 42 | */ |
| 43 | unsigned long restore_cr3 __visible; |
| 44 | |
| 45 | unsigned long temp_level4_pgt __visible; |
| 46 | |
| 47 | unsigned long relocated_restore_code __visible; |
| 48 | |
| 49 | static int set_up_temporary_text_mapping(pgd_t *pgd) |
| 50 | { |
| 51 | pmd_t *pmd; |
| 52 | pud_t *pud; |
| 53 | p4d_t *p4d = NULL; |
| 54 | pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE); |
| 55 | pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC); |
| 56 | |
| 57 | /* Filter out unsupported __PAGE_KERNEL* bits: */ |
| 58 | pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask; |
| 59 | pgprot_val(pgtable_prot) &= __default_kernel_pte_mask; |
| 60 | |
| 61 | /* |
| 62 | * The new mapping only has to cover the page containing the image |
| 63 | * kernel's entry point (jump_address_phys), because the switch over to |
| 64 | * it is carried out by relocated code running from a page allocated |
| 65 | * specifically for this purpose and covered by the identity mapping, so |
| 66 | * the temporary kernel text mapping is only needed for the final jump. |
| 67 | * Moreover, in that mapping the virtual address of the image kernel's |
| 68 | * entry point must be the same as its virtual address in the image |
| 69 | * kernel (restore_jump_address), so the image kernel's |
| 70 | * restore_registers() code doesn't find itself in a different area of |
| 71 | * the virtual address space after switching over to the original page |
| 72 | * tables used by the image kernel. |
| 73 | */ |
| 74 | |
| 75 | if (pgtable_l5_enabled()) { |
| 76 | p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); |
| 77 | if (!p4d) |
| 78 | return -ENOMEM; |
| 79 | } |
| 80 | |
| 81 | pud = (pud_t *)get_safe_page(GFP_ATOMIC); |
| 82 | if (!pud) |
| 83 | return -ENOMEM; |
| 84 | |
| 85 | pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); |
| 86 | if (!pmd) |
| 87 | return -ENOMEM; |
| 88 | |
| 89 | set_pmd(pmd + pmd_index(restore_jump_address), |
| 90 | __pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot))); |
| 91 | set_pud(pud + pud_index(restore_jump_address), |
| 92 | __pud(__pa(pmd) | pgprot_val(pgtable_prot))); |
| 93 | if (p4d) { |
| 94 | p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot)); |
| 95 | pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot)); |
| 96 | |
| 97 | set_p4d(p4d + p4d_index(restore_jump_address), new_p4d); |
| 98 | set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); |
| 99 | } else { |
| 100 | /* No p4d for 4-level paging: point the pgd to the pud page table */ |
| 101 | pgd_t new_pgd = __pgd(__pa(pud) | pgprot_val(pgtable_prot)); |
| 102 | set_pgd(pgd + pgd_index(restore_jump_address), new_pgd); |
| 103 | } |
| 104 | |
| 105 | return 0; |
| 106 | } |
| 107 | |
| 108 | static void *alloc_pgt_page(void *context) |
| 109 | { |
| 110 | return (void *)get_safe_page(GFP_ATOMIC); |
| 111 | } |
| 112 | |
| 113 | static int set_up_temporary_mappings(void) |
| 114 | { |
| 115 | struct x86_mapping_info info = { |
| 116 | .alloc_pgt_page = alloc_pgt_page, |
| 117 | .page_flag = __PAGE_KERNEL_LARGE_EXEC, |
| 118 | .offset = __PAGE_OFFSET, |
| 119 | }; |
| 120 | unsigned long mstart, mend; |
| 121 | pgd_t *pgd; |
| 122 | int result; |
| 123 | int i; |
| 124 | |
| 125 | pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); |
| 126 | if (!pgd) |
| 127 | return -ENOMEM; |
| 128 | |
| 129 | /* Prepare a temporary mapping for the kernel text */ |
| 130 | result = set_up_temporary_text_mapping(pgd); |
| 131 | if (result) |
| 132 | return result; |
| 133 | |
| 134 | /* Set up the direct mapping from scratch */ |
| 135 | for (i = 0; i < nr_pfn_mapped; i++) { |
| 136 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
| 137 | mend = pfn_mapped[i].end << PAGE_SHIFT; |
| 138 | |
| 139 | result = kernel_ident_mapping_init(&info, pgd, mstart, mend); |
| 140 | if (result) |
| 141 | return result; |
| 142 | } |
| 143 | |
| 144 | temp_level4_pgt = __pa(pgd); |
| 145 | return 0; |
| 146 | } |
| 147 | |
| 148 | static int relocate_restore_code(void) |
| 149 | { |
| 150 | pgd_t *pgd; |
| 151 | p4d_t *p4d; |
| 152 | pud_t *pud; |
| 153 | pmd_t *pmd; |
| 154 | pte_t *pte; |
| 155 | |
| 156 | relocated_restore_code = get_safe_page(GFP_ATOMIC); |
| 157 | if (!relocated_restore_code) |
| 158 | return -ENOMEM; |
| 159 | |
| 160 | memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); |
| 161 | |
| 162 | /* Make the page containing the relocated code executable */ |
| 163 | pgd = (pgd_t *)__va(read_cr3_pa()) + |
| 164 | pgd_index(relocated_restore_code); |
| 165 | p4d = p4d_offset(pgd, relocated_restore_code); |
| 166 | if (p4d_large(*p4d)) { |
| 167 | set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX)); |
| 168 | goto out; |
| 169 | } |
| 170 | pud = pud_offset(p4d, relocated_restore_code); |
| 171 | if (pud_large(*pud)) { |
| 172 | set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX)); |
| 173 | goto out; |
| 174 | } |
| 175 | pmd = pmd_offset(pud, relocated_restore_code); |
| 176 | if (pmd_large(*pmd)) { |
| 177 | set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX)); |
| 178 | goto out; |
| 179 | } |
| 180 | pte = pte_offset_kernel(pmd, relocated_restore_code); |
| 181 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX)); |
| 182 | out: |
| 183 | __flush_tlb_all(); |
| 184 | return 0; |
| 185 | } |
| 186 | |
| 187 | asmlinkage int swsusp_arch_resume(void) |
| 188 | { |
| 189 | int error; |
| 190 | |
| 191 | /* We have got enough memory and from now on we cannot recover */ |
| 192 | error = set_up_temporary_mappings(); |
| 193 | if (error) |
| 194 | return error; |
| 195 | |
| 196 | error = relocate_restore_code(); |
| 197 | if (error) |
| 198 | return error; |
| 199 | |
| 200 | restore_image(); |
| 201 | return 0; |
| 202 | } |
| 203 | |
| 204 | /* |
| 205 | * pfn_is_nosave - check if given pfn is in the 'nosave' section |
| 206 | */ |
| 207 | |
| 208 | int pfn_is_nosave(unsigned long pfn) |
| 209 | { |
| 210 | unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; |
| 211 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; |
| 212 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); |
| 213 | } |
| 214 | |
| 215 | #define MD5_DIGEST_SIZE 16 |
| 216 | |
| 217 | struct restore_data_record { |
| 218 | unsigned long jump_address; |
| 219 | unsigned long jump_address_phys; |
| 220 | unsigned long cr3; |
| 221 | unsigned long magic; |
| 222 | u8 e820_digest[MD5_DIGEST_SIZE]; |
| 223 | }; |
| 224 | |
| 225 | #define RESTORE_MAGIC 0x23456789ABCDEF01UL |
| 226 | |
| 227 | #if IS_BUILTIN(CONFIG_CRYPTO_MD5) |
| 228 | /** |
| 229 | * get_e820_md5 - calculate md5 according to given e820 table |
| 230 | * |
| 231 | * @table: the e820 table to be calculated |
| 232 | * @buf: the md5 result to be stored to |
| 233 | */ |
| 234 | static int get_e820_md5(struct e820_table *table, void *buf) |
| 235 | { |
| 236 | struct crypto_shash *tfm; |
| 237 | struct shash_desc *desc; |
| 238 | int size; |
| 239 | int ret = 0; |
| 240 | |
| 241 | tfm = crypto_alloc_shash("md5", 0, 0); |
| 242 | if (IS_ERR(tfm)) |
| 243 | return -ENOMEM; |
| 244 | |
| 245 | desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm), |
| 246 | GFP_KERNEL); |
| 247 | if (!desc) { |
| 248 | ret = -ENOMEM; |
| 249 | goto free_tfm; |
| 250 | } |
| 251 | |
| 252 | desc->tfm = tfm; |
| 253 | desc->flags = 0; |
| 254 | |
| 255 | size = offsetof(struct e820_table, entries) + |
| 256 | sizeof(struct e820_entry) * table->nr_entries; |
| 257 | |
| 258 | if (crypto_shash_digest(desc, (u8 *)table, size, buf)) |
| 259 | ret = -EINVAL; |
| 260 | |
| 261 | kzfree(desc); |
| 262 | |
| 263 | free_tfm: |
| 264 | crypto_free_shash(tfm); |
| 265 | return ret; |
| 266 | } |
| 267 | |
| 268 | static void hibernation_e820_save(void *buf) |
| 269 | { |
| 270 | get_e820_md5(e820_table_firmware, buf); |
| 271 | } |
| 272 | |
| 273 | static bool hibernation_e820_mismatch(void *buf) |
| 274 | { |
| 275 | int ret; |
| 276 | u8 result[MD5_DIGEST_SIZE]; |
| 277 | |
| 278 | memset(result, 0, MD5_DIGEST_SIZE); |
| 279 | /* If there is no digest in suspend kernel, let it go. */ |
| 280 | if (!memcmp(result, buf, MD5_DIGEST_SIZE)) |
| 281 | return false; |
| 282 | |
| 283 | ret = get_e820_md5(e820_table_firmware, result); |
| 284 | if (ret) |
| 285 | return true; |
| 286 | |
| 287 | return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; |
| 288 | } |
| 289 | #else |
| 290 | static void hibernation_e820_save(void *buf) |
| 291 | { |
| 292 | } |
| 293 | |
| 294 | static bool hibernation_e820_mismatch(void *buf) |
| 295 | { |
| 296 | /* If md5 is not builtin for restore kernel, let it go. */ |
| 297 | return false; |
| 298 | } |
| 299 | #endif |
| 300 | |
| 301 | /** |
| 302 | * arch_hibernation_header_save - populate the architecture specific part |
| 303 | * of a hibernation image header |
| 304 | * @addr: address to save the data at |
| 305 | */ |
| 306 | int arch_hibernation_header_save(void *addr, unsigned int max_size) |
| 307 | { |
| 308 | struct restore_data_record *rdr = addr; |
| 309 | |
| 310 | if (max_size < sizeof(struct restore_data_record)) |
| 311 | return -EOVERFLOW; |
| 312 | rdr->jump_address = (unsigned long)restore_registers; |
| 313 | rdr->jump_address_phys = __pa_symbol(restore_registers); |
| 314 | |
| 315 | /* |
| 316 | * The restore code fixes up CR3 and CR4 in the following sequence: |
| 317 | * |
| 318 | * [in hibernation asm] |
| 319 | * 1. CR3 <= temporary page tables |
| 320 | * 2. CR4 <= mmu_cr4_features (from the kernel that restores us) |
| 321 | * 3. CR3 <= rdr->cr3 |
| 322 | * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel) |
| 323 | * [in restore_processor_state()] |
| 324 | * 5. CR4 <= saved CR4 |
| 325 | * 6. CR3 <= saved CR3 |
| 326 | * |
| 327 | * Our mmu_cr4_features has CR4.PCIDE=0, and toggling |
| 328 | * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so |
| 329 | * rdr->cr3 needs to point to valid page tables but must not |
| 330 | * have any of the PCID bits set. |
| 331 | */ |
| 332 | rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK; |
| 333 | |
| 334 | rdr->magic = RESTORE_MAGIC; |
| 335 | |
| 336 | hibernation_e820_save(rdr->e820_digest); |
| 337 | |
| 338 | return 0; |
| 339 | } |
| 340 | |
| 341 | /** |
| 342 | * arch_hibernation_header_restore - read the architecture specific data |
| 343 | * from the hibernation image header |
| 344 | * @addr: address to read the data from |
| 345 | */ |
| 346 | int arch_hibernation_header_restore(void *addr) |
| 347 | { |
| 348 | struct restore_data_record *rdr = addr; |
| 349 | |
| 350 | restore_jump_address = rdr->jump_address; |
| 351 | jump_address_phys = rdr->jump_address_phys; |
| 352 | restore_cr3 = rdr->cr3; |
| 353 | |
| 354 | if (rdr->magic != RESTORE_MAGIC) { |
| 355 | pr_crit("Unrecognized hibernate image header format!\n"); |
| 356 | return -EINVAL; |
| 357 | } |
| 358 | |
| 359 | if (hibernation_e820_mismatch(rdr->e820_digest)) { |
| 360 | pr_crit("Hibernate inconsistent memory map detected!\n"); |
| 361 | return -ENODEV; |
| 362 | } |
| 363 | |
| 364 | return 0; |
| 365 | } |