David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /*: |
| 3 | * Hibernate support specific for ARM64 |
| 4 | * |
| 5 | * Derived from work on ARM hibernation support by: |
| 6 | * |
| 7 | * Ubuntu project, hibernation support for mach-dove |
| 8 | * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) |
| 9 | * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) |
| 10 | * https://lkml.org/lkml/2010/6/18/4 |
| 11 | * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html |
| 12 | * https://patchwork.kernel.org/patch/96442/ |
| 13 | * |
| 14 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 15 | */ |
| 16 | #define pr_fmt(x) "hibernate: " x |
| 17 | #include <linux/cpu.h> |
| 18 | #include <linux/kvm_host.h> |
| 19 | #include <linux/mm.h> |
| 20 | #include <linux/pm.h> |
| 21 | #include <linux/sched.h> |
| 22 | #include <linux/suspend.h> |
| 23 | #include <linux/utsname.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 24 | |
| 25 | #include <asm/barrier.h> |
| 26 | #include <asm/cacheflush.h> |
| 27 | #include <asm/cputype.h> |
| 28 | #include <asm/daifflags.h> |
| 29 | #include <asm/irqflags.h> |
| 30 | #include <asm/kexec.h> |
| 31 | #include <asm/memory.h> |
| 32 | #include <asm/mmu_context.h> |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 33 | #include <asm/mte.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 34 | #include <asm/pgalloc.h> |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 35 | #include <asm/pgtable-hwdef.h> |
| 36 | #include <asm/sections.h> |
| 37 | #include <asm/smp.h> |
| 38 | #include <asm/smp_plat.h> |
| 39 | #include <asm/suspend.h> |
| 40 | #include <asm/sysreg.h> |
| 41 | #include <asm/virt.h> |
| 42 | |
| 43 | /* |
| 44 | * Hibernate core relies on this value being 0 on resume, and marks it |
| 45 | * __nosavedata assuming it will keep the resume kernel's '0' value. This |
| 46 | * doesn't happen with either KASLR. |
| 47 | * |
| 48 | * defined as "__visible int in_suspend __nosavedata" in |
| 49 | * kernel/power/hibernate.c |
| 50 | */ |
| 51 | extern int in_suspend; |
| 52 | |
| 53 | /* Do we need to reset el2? */ |
| 54 | #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) |
| 55 | |
| 56 | /* temporary el2 vectors in the __hibernate_exit_text section. */ |
| 57 | extern char hibernate_el2_vectors[]; |
| 58 | |
| 59 | /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ |
| 60 | extern char __hyp_stub_vectors[]; |
| 61 | |
| 62 | /* |
| 63 | * The logical cpu number we should resume on, initialised to a non-cpu |
| 64 | * number. |
| 65 | */ |
| 66 | static int sleep_cpu = -EINVAL; |
| 67 | |
| 68 | /* |
| 69 | * Values that may not change over hibernate/resume. We put the build number |
| 70 | * and date in here so that we guarantee not to resume with a different |
| 71 | * kernel. |
| 72 | */ |
| 73 | struct arch_hibernate_hdr_invariants { |
| 74 | char uts_version[__NEW_UTS_LEN + 1]; |
| 75 | }; |
| 76 | |
| 77 | /* These values need to be know across a hibernate/restore. */ |
| 78 | static struct arch_hibernate_hdr { |
| 79 | struct arch_hibernate_hdr_invariants invariants; |
| 80 | |
| 81 | /* These are needed to find the relocated kernel if built with kaslr */ |
| 82 | phys_addr_t ttbr1_el1; |
| 83 | void (*reenter_kernel)(void); |
| 84 | |
| 85 | /* |
| 86 | * We need to know where the __hyp_stub_vectors are after restore to |
| 87 | * re-configure el2. |
| 88 | */ |
| 89 | phys_addr_t __hyp_stub_vectors; |
| 90 | |
| 91 | u64 sleep_cpu_mpidr; |
| 92 | } resume_hdr; |
| 93 | |
| 94 | static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) |
| 95 | { |
| 96 | memset(i, 0, sizeof(*i)); |
| 97 | memcpy(i->uts_version, init_utsname()->version, sizeof(i->uts_version)); |
| 98 | } |
| 99 | |
| 100 | int pfn_is_nosave(unsigned long pfn) |
| 101 | { |
| 102 | unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); |
| 103 | unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); |
| 104 | |
| 105 | return ((pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn)) || |
| 106 | crash_is_nosave(pfn); |
| 107 | } |
| 108 | |
| 109 | void notrace save_processor_state(void) |
| 110 | { |
| 111 | WARN_ON(num_online_cpus() != 1); |
| 112 | } |
| 113 | |
| 114 | void notrace restore_processor_state(void) |
| 115 | { |
| 116 | } |
| 117 | |
| 118 | int arch_hibernation_header_save(void *addr, unsigned int max_size) |
| 119 | { |
| 120 | struct arch_hibernate_hdr *hdr = addr; |
| 121 | |
| 122 | if (max_size < sizeof(*hdr)) |
| 123 | return -EOVERFLOW; |
| 124 | |
| 125 | arch_hdr_invariants(&hdr->invariants); |
| 126 | hdr->ttbr1_el1 = __pa_symbol(swapper_pg_dir); |
| 127 | hdr->reenter_kernel = _cpu_resume; |
| 128 | |
| 129 | /* We can't use __hyp_get_vectors() because kvm may still be loaded */ |
| 130 | if (el2_reset_needed()) |
| 131 | hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors); |
| 132 | else |
| 133 | hdr->__hyp_stub_vectors = 0; |
| 134 | |
| 135 | /* Save the mpidr of the cpu we called cpu_suspend() on... */ |
| 136 | if (sleep_cpu < 0) { |
| 137 | pr_err("Failing to hibernate on an unknown CPU.\n"); |
| 138 | return -ENODEV; |
| 139 | } |
| 140 | hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu); |
| 141 | pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu, |
| 142 | hdr->sleep_cpu_mpidr); |
| 143 | |
| 144 | return 0; |
| 145 | } |
| 146 | EXPORT_SYMBOL(arch_hibernation_header_save); |
| 147 | |
| 148 | int arch_hibernation_header_restore(void *addr) |
| 149 | { |
| 150 | int ret; |
| 151 | struct arch_hibernate_hdr_invariants invariants; |
| 152 | struct arch_hibernate_hdr *hdr = addr; |
| 153 | |
| 154 | arch_hdr_invariants(&invariants); |
| 155 | if (memcmp(&hdr->invariants, &invariants, sizeof(invariants))) { |
| 156 | pr_crit("Hibernate image not generated by this kernel!\n"); |
| 157 | return -EINVAL; |
| 158 | } |
| 159 | |
| 160 | sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr); |
| 161 | pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu, |
| 162 | hdr->sleep_cpu_mpidr); |
| 163 | if (sleep_cpu < 0) { |
| 164 | pr_crit("Hibernated on a CPU not known to this kernel!\n"); |
| 165 | sleep_cpu = -EINVAL; |
| 166 | return -EINVAL; |
| 167 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 168 | |
| 169 | ret = bringup_hibernate_cpu(sleep_cpu); |
| 170 | if (ret) { |
| 171 | sleep_cpu = -EINVAL; |
| 172 | return ret; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | resume_hdr = *hdr; |
| 176 | |
| 177 | return 0; |
| 178 | } |
| 179 | EXPORT_SYMBOL(arch_hibernation_header_restore); |
| 180 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 181 | static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, |
| 182 | unsigned long dst_addr, |
| 183 | pgprot_t pgprot) |
| 184 | { |
| 185 | pgd_t *pgdp; |
| 186 | p4d_t *p4dp; |
| 187 | pud_t *pudp; |
| 188 | pmd_t *pmdp; |
| 189 | pte_t *ptep; |
| 190 | |
| 191 | pgdp = pgd_offset_pgd(trans_pgd, dst_addr); |
| 192 | if (pgd_none(READ_ONCE(*pgdp))) { |
| 193 | pudp = (void *)get_safe_page(GFP_ATOMIC); |
| 194 | if (!pudp) |
| 195 | return -ENOMEM; |
| 196 | pgd_populate(&init_mm, pgdp, pudp); |
| 197 | } |
| 198 | |
| 199 | p4dp = p4d_offset(pgdp, dst_addr); |
| 200 | if (p4d_none(READ_ONCE(*p4dp))) { |
| 201 | pudp = (void *)get_safe_page(GFP_ATOMIC); |
| 202 | if (!pudp) |
| 203 | return -ENOMEM; |
| 204 | p4d_populate(&init_mm, p4dp, pudp); |
| 205 | } |
| 206 | |
| 207 | pudp = pud_offset(p4dp, dst_addr); |
| 208 | if (pud_none(READ_ONCE(*pudp))) { |
| 209 | pmdp = (void *)get_safe_page(GFP_ATOMIC); |
| 210 | if (!pmdp) |
| 211 | return -ENOMEM; |
| 212 | pud_populate(&init_mm, pudp, pmdp); |
| 213 | } |
| 214 | |
| 215 | pmdp = pmd_offset(pudp, dst_addr); |
| 216 | if (pmd_none(READ_ONCE(*pmdp))) { |
| 217 | ptep = (void *)get_safe_page(GFP_ATOMIC); |
| 218 | if (!ptep) |
| 219 | return -ENOMEM; |
| 220 | pmd_populate_kernel(&init_mm, pmdp, ptep); |
| 221 | } |
| 222 | |
| 223 | ptep = pte_offset_kernel(pmdp, dst_addr); |
| 224 | set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); |
| 225 | |
| 226 | return 0; |
| 227 | } |
| 228 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 229 | /* |
| 230 | * Copies length bytes, starting at src_start into an new page, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 231 | * perform cache maintenance, then maps it at the specified address low |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 232 | * address as executable. |
| 233 | * |
| 234 | * This is used by hibernate to copy the code it needs to execute when |
| 235 | * overwriting the kernel text. This function generates a new set of page |
| 236 | * tables, which it loads into ttbr0. |
| 237 | * |
| 238 | * Length is provided as we probably only want 4K of data, even on a 64K |
| 239 | * page system. |
| 240 | */ |
| 241 | static int create_safe_exec_page(void *src_start, size_t length, |
| 242 | unsigned long dst_addr, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 243 | phys_addr_t *phys_dst_addr) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 244 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 245 | void *page = (void *)get_safe_page(GFP_ATOMIC); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 246 | pgd_t *trans_pgd; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 247 | int rc; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 248 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 249 | if (!page) |
| 250 | return -ENOMEM; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 251 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 252 | memcpy(page, src_start, length); |
| 253 | __flush_icache_range((unsigned long)page, (unsigned long)page + length); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 254 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 255 | trans_pgd = (void *)get_safe_page(GFP_ATOMIC); |
| 256 | if (!trans_pgd) |
| 257 | return -ENOMEM; |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 258 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 259 | rc = trans_pgd_map_page(trans_pgd, page, dst_addr, |
| 260 | PAGE_KERNEL_EXEC); |
| 261 | if (rc) |
| 262 | return rc; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 263 | |
| 264 | /* |
| 265 | * Load our new page tables. A strict BBM approach requires that we |
| 266 | * ensure that TLBs are free of any entries that may overlap with the |
| 267 | * global mappings we are about to install. |
| 268 | * |
| 269 | * For a real hibernate/resume cycle TTBR0 currently points to a zero |
| 270 | * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI |
| 271 | * runtime services), while for a userspace-driven test_resume cycle it |
| 272 | * points to userspace page tables (and we must point it at a zero page |
| 273 | * ourselves). Elsewhere we only (un)install the idmap with preemption |
| 274 | * disabled, so T0SZ should be as required regardless. |
| 275 | */ |
| 276 | cpu_set_reserved_ttbr0(); |
| 277 | local_flush_tlb_all(); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 278 | write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 279 | isb(); |
| 280 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 281 | *phys_dst_addr = virt_to_phys(page); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 282 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 283 | return 0; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 284 | } |
| 285 | |
| 286 | #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) |
| 287 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 288 | #ifdef CONFIG_ARM64_MTE |
| 289 | |
| 290 | static DEFINE_XARRAY(mte_pages); |
| 291 | |
| 292 | static int save_tags(struct page *page, unsigned long pfn) |
| 293 | { |
| 294 | void *tag_storage, *ret; |
| 295 | |
| 296 | tag_storage = mte_allocate_tag_storage(); |
| 297 | if (!tag_storage) |
| 298 | return -ENOMEM; |
| 299 | |
| 300 | mte_save_page_tags(page_address(page), tag_storage); |
| 301 | |
| 302 | ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL); |
| 303 | if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { |
| 304 | mte_free_tag_storage(tag_storage); |
| 305 | return xa_err(ret); |
| 306 | } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) { |
| 307 | mte_free_tag_storage(ret); |
| 308 | } |
| 309 | |
| 310 | return 0; |
| 311 | } |
| 312 | |
| 313 | static void swsusp_mte_free_storage(void) |
| 314 | { |
| 315 | XA_STATE(xa_state, &mte_pages, 0); |
| 316 | void *tags; |
| 317 | |
| 318 | xa_lock(&mte_pages); |
| 319 | xas_for_each(&xa_state, tags, ULONG_MAX) { |
| 320 | mte_free_tag_storage(tags); |
| 321 | } |
| 322 | xa_unlock(&mte_pages); |
| 323 | |
| 324 | xa_destroy(&mte_pages); |
| 325 | } |
| 326 | |
| 327 | static int swsusp_mte_save_tags(void) |
| 328 | { |
| 329 | struct zone *zone; |
| 330 | unsigned long pfn, max_zone_pfn; |
| 331 | int ret = 0; |
| 332 | int n = 0; |
| 333 | |
| 334 | if (!system_supports_mte()) |
| 335 | return 0; |
| 336 | |
| 337 | for_each_populated_zone(zone) { |
| 338 | max_zone_pfn = zone_end_pfn(zone); |
| 339 | for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { |
| 340 | struct page *page = pfn_to_online_page(pfn); |
| 341 | |
| 342 | if (!page) |
| 343 | continue; |
| 344 | |
| 345 | if (!test_bit(PG_mte_tagged, &page->flags)) |
| 346 | continue; |
| 347 | |
| 348 | ret = save_tags(page, pfn); |
| 349 | if (ret) { |
| 350 | swsusp_mte_free_storage(); |
| 351 | goto out; |
| 352 | } |
| 353 | |
| 354 | n++; |
| 355 | } |
| 356 | } |
| 357 | pr_info("Saved %d MTE pages\n", n); |
| 358 | |
| 359 | out: |
| 360 | return ret; |
| 361 | } |
| 362 | |
| 363 | static void swsusp_mte_restore_tags(void) |
| 364 | { |
| 365 | XA_STATE(xa_state, &mte_pages, 0); |
| 366 | int n = 0; |
| 367 | void *tags; |
| 368 | |
| 369 | xa_lock(&mte_pages); |
| 370 | xas_for_each(&xa_state, tags, ULONG_MAX) { |
| 371 | unsigned long pfn = xa_state.xa_index; |
| 372 | struct page *page = pfn_to_online_page(pfn); |
| 373 | |
| 374 | mte_restore_page_tags(page_address(page), tags); |
| 375 | |
| 376 | mte_free_tag_storage(tags); |
| 377 | n++; |
| 378 | } |
| 379 | xa_unlock(&mte_pages); |
| 380 | |
| 381 | pr_info("Restored %d MTE pages\n", n); |
| 382 | |
| 383 | xa_destroy(&mte_pages); |
| 384 | } |
| 385 | |
| 386 | #else /* CONFIG_ARM64_MTE */ |
| 387 | |
| 388 | static int swsusp_mte_save_tags(void) |
| 389 | { |
| 390 | return 0; |
| 391 | } |
| 392 | |
| 393 | static void swsusp_mte_restore_tags(void) |
| 394 | { |
| 395 | } |
| 396 | |
| 397 | #endif /* CONFIG_ARM64_MTE */ |
| 398 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 399 | int swsusp_arch_suspend(void) |
| 400 | { |
| 401 | int ret = 0; |
| 402 | unsigned long flags; |
| 403 | struct sleep_stack_data state; |
| 404 | |
| 405 | if (cpus_are_stuck_in_kernel()) { |
| 406 | pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); |
| 407 | return -EBUSY; |
| 408 | } |
| 409 | |
| 410 | flags = local_daif_save(); |
| 411 | |
| 412 | if (__cpu_suspend_enter(&state)) { |
| 413 | /* make the crash dump kernel image visible/saveable */ |
| 414 | crash_prepare_suspend(); |
| 415 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 416 | ret = swsusp_mte_save_tags(); |
| 417 | if (ret) |
| 418 | return ret; |
| 419 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 420 | sleep_cpu = smp_processor_id(); |
| 421 | ret = swsusp_save(); |
| 422 | } else { |
| 423 | /* Clean kernel core startup/idle code to PoC*/ |
| 424 | dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); |
| 425 | dcache_clean_range(__idmap_text_start, __idmap_text_end); |
| 426 | |
| 427 | /* Clean kvm setup code to PoC? */ |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 428 | if (el2_reset_needed()) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 429 | dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame] | 430 | dcache_clean_range(__hyp_text_start, __hyp_text_end); |
| 431 | } |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 432 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 433 | swsusp_mte_restore_tags(); |
| 434 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 435 | /* make the crash dump kernel image protected again */ |
| 436 | crash_post_resume(); |
| 437 | |
| 438 | /* |
| 439 | * Tell the hibernation core that we've just restored |
| 440 | * the memory |
| 441 | */ |
| 442 | in_suspend = 0; |
| 443 | |
| 444 | sleep_cpu = -EINVAL; |
| 445 | __cpu_suspend_exit(); |
| 446 | |
| 447 | /* |
| 448 | * Just in case the boot kernel did turn the SSBD |
| 449 | * mitigation off behind our back, let's set the state |
| 450 | * to what we expect it to be. |
| 451 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 452 | spectre_v4_enable_mitigation(NULL); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 453 | } |
| 454 | |
| 455 | local_daif_restore(flags); |
| 456 | |
| 457 | return ret; |
| 458 | } |
| 459 | |
| 460 | static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) |
| 461 | { |
| 462 | pte_t pte = READ_ONCE(*src_ptep); |
| 463 | |
| 464 | if (pte_valid(pte)) { |
| 465 | /* |
| 466 | * Resume will overwrite areas that may be marked |
| 467 | * read only (code, rodata). Clear the RDONLY bit from |
| 468 | * the temporary mappings we use during restore. |
| 469 | */ |
| 470 | set_pte(dst_ptep, pte_mkwrite(pte)); |
| 471 | } else if (debug_pagealloc_enabled() && !pte_none(pte)) { |
| 472 | /* |
| 473 | * debug_pagealloc will removed the PTE_VALID bit if |
| 474 | * the page isn't in use by the resume kernel. It may have |
| 475 | * been in use by the original kernel, in which case we need |
| 476 | * to put it back in our copy to do the restore. |
| 477 | * |
| 478 | * Before marking this entry valid, check the pfn should |
| 479 | * be mapped. |
| 480 | */ |
| 481 | BUG_ON(!pfn_valid(pte_pfn(pte))); |
| 482 | |
| 483 | set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, |
| 488 | unsigned long end) |
| 489 | { |
| 490 | pte_t *src_ptep; |
| 491 | pte_t *dst_ptep; |
| 492 | unsigned long addr = start; |
| 493 | |
| 494 | dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); |
| 495 | if (!dst_ptep) |
| 496 | return -ENOMEM; |
| 497 | pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); |
| 498 | dst_ptep = pte_offset_kernel(dst_pmdp, start); |
| 499 | |
| 500 | src_ptep = pte_offset_kernel(src_pmdp, start); |
| 501 | do { |
| 502 | _copy_pte(dst_ptep, src_ptep, addr); |
| 503 | } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); |
| 504 | |
| 505 | return 0; |
| 506 | } |
| 507 | |
| 508 | static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, |
| 509 | unsigned long end) |
| 510 | { |
| 511 | pmd_t *src_pmdp; |
| 512 | pmd_t *dst_pmdp; |
| 513 | unsigned long next; |
| 514 | unsigned long addr = start; |
| 515 | |
| 516 | if (pud_none(READ_ONCE(*dst_pudp))) { |
| 517 | dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); |
| 518 | if (!dst_pmdp) |
| 519 | return -ENOMEM; |
| 520 | pud_populate(&init_mm, dst_pudp, dst_pmdp); |
| 521 | } |
| 522 | dst_pmdp = pmd_offset(dst_pudp, start); |
| 523 | |
| 524 | src_pmdp = pmd_offset(src_pudp, start); |
| 525 | do { |
| 526 | pmd_t pmd = READ_ONCE(*src_pmdp); |
| 527 | |
| 528 | next = pmd_addr_end(addr, end); |
| 529 | if (pmd_none(pmd)) |
| 530 | continue; |
| 531 | if (pmd_table(pmd)) { |
| 532 | if (copy_pte(dst_pmdp, src_pmdp, addr, next)) |
| 533 | return -ENOMEM; |
| 534 | } else { |
| 535 | set_pmd(dst_pmdp, |
| 536 | __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); |
| 537 | } |
| 538 | } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); |
| 539 | |
| 540 | return 0; |
| 541 | } |
| 542 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 543 | static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 544 | unsigned long end) |
| 545 | { |
| 546 | pud_t *dst_pudp; |
| 547 | pud_t *src_pudp; |
| 548 | unsigned long next; |
| 549 | unsigned long addr = start; |
| 550 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 551 | if (p4d_none(READ_ONCE(*dst_p4dp))) { |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 552 | dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); |
| 553 | if (!dst_pudp) |
| 554 | return -ENOMEM; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 555 | p4d_populate(&init_mm, dst_p4dp, dst_pudp); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 556 | } |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 557 | dst_pudp = pud_offset(dst_p4dp, start); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 558 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 559 | src_pudp = pud_offset(src_p4dp, start); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 560 | do { |
| 561 | pud_t pud = READ_ONCE(*src_pudp); |
| 562 | |
| 563 | next = pud_addr_end(addr, end); |
| 564 | if (pud_none(pud)) |
| 565 | continue; |
| 566 | if (pud_table(pud)) { |
| 567 | if (copy_pmd(dst_pudp, src_pudp, addr, next)) |
| 568 | return -ENOMEM; |
| 569 | } else { |
| 570 | set_pud(dst_pudp, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 571 | __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 572 | } |
| 573 | } while (dst_pudp++, src_pudp++, addr = next, addr != end); |
| 574 | |
| 575 | return 0; |
| 576 | } |
| 577 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 578 | static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, |
| 579 | unsigned long end) |
| 580 | { |
| 581 | p4d_t *dst_p4dp; |
| 582 | p4d_t *src_p4dp; |
| 583 | unsigned long next; |
| 584 | unsigned long addr = start; |
| 585 | |
| 586 | dst_p4dp = p4d_offset(dst_pgdp, start); |
| 587 | src_p4dp = p4d_offset(src_pgdp, start); |
| 588 | do { |
| 589 | next = p4d_addr_end(addr, end); |
| 590 | if (p4d_none(READ_ONCE(*src_p4dp))) |
| 591 | continue; |
| 592 | if (copy_pud(dst_p4dp, src_p4dp, addr, next)) |
| 593 | return -ENOMEM; |
| 594 | } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); |
| 595 | |
| 596 | return 0; |
| 597 | } |
| 598 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 599 | static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, |
| 600 | unsigned long end) |
| 601 | { |
| 602 | unsigned long next; |
| 603 | unsigned long addr = start; |
| 604 | pgd_t *src_pgdp = pgd_offset_k(start); |
| 605 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 606 | dst_pgdp = pgd_offset_pgd(dst_pgdp, start); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 607 | do { |
| 608 | next = pgd_addr_end(addr, end); |
| 609 | if (pgd_none(READ_ONCE(*src_pgdp))) |
| 610 | continue; |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 611 | if (copy_p4d(dst_pgdp, src_pgdp, addr, next)) |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 612 | return -ENOMEM; |
| 613 | } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); |
| 614 | |
| 615 | return 0; |
| 616 | } |
| 617 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 618 | static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, |
| 619 | unsigned long end) |
| 620 | { |
| 621 | int rc; |
| 622 | pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); |
| 623 | |
| 624 | if (!trans_pgd) { |
| 625 | pr_err("Failed to allocate memory for temporary page tables.\n"); |
| 626 | return -ENOMEM; |
| 627 | } |
| 628 | |
| 629 | rc = copy_page_tables(trans_pgd, start, end); |
| 630 | if (!rc) |
| 631 | *dst_pgdp = trans_pgd; |
| 632 | |
| 633 | return rc; |
| 634 | } |
| 635 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 636 | /* |
| 637 | * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). |
| 638 | * |
| 639 | * Memory allocated by get_safe_page() will be dealt with by the hibernate code, |
| 640 | * we don't need to free it here. |
| 641 | */ |
| 642 | int swsusp_arch_resume(void) |
| 643 | { |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 644 | int rc; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 645 | void *zero_page; |
| 646 | size_t exit_size; |
| 647 | pgd_t *tmp_pg_dir; |
| 648 | phys_addr_t phys_hibernate_exit; |
| 649 | void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, |
| 650 | void *, phys_addr_t, phys_addr_t); |
| 651 | |
| 652 | /* |
| 653 | * Restoring the memory image will overwrite the ttbr1 page tables. |
| 654 | * Create a second copy of just the linear map, and use this when |
| 655 | * restoring. |
| 656 | */ |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 657 | rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 658 | if (rc) |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 659 | return rc; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 660 | |
| 661 | /* |
| 662 | * We need a zero page that is zero before & after resume in order to |
| 663 | * to break before make on the ttbr1 page tables. |
| 664 | */ |
| 665 | zero_page = (void *)get_safe_page(GFP_ATOMIC); |
| 666 | if (!zero_page) { |
| 667 | pr_err("Failed to allocate zero page.\n"); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 668 | return -ENOMEM; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 669 | } |
| 670 | |
| 671 | /* |
| 672 | * Locate the exit code in the bottom-but-one page, so that *NULL |
| 673 | * still has disastrous affects. |
| 674 | */ |
| 675 | hibernate_exit = (void *)PAGE_SIZE; |
| 676 | exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; |
| 677 | /* |
| 678 | * Copy swsusp_arch_suspend_exit() to a safe page. This will generate |
| 679 | * a new set of ttbr0 page tables and load them. |
| 680 | */ |
| 681 | rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, |
| 682 | (unsigned long)hibernate_exit, |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 683 | &phys_hibernate_exit); |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 684 | if (rc) { |
| 685 | pr_err("Failed to create safe executable page for hibernate_exit code.\n"); |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 686 | return rc; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 687 | } |
| 688 | |
| 689 | /* |
| 690 | * The hibernate exit text contains a set of el2 vectors, that will |
| 691 | * be executed at el2 with the mmu off in order to reload hyp-stub. |
| 692 | */ |
| 693 | __flush_dcache_area(hibernate_exit, exit_size); |
| 694 | |
| 695 | /* |
| 696 | * KASLR will cause the el2 vectors to be in a different location in |
| 697 | * the resumed kernel. Load hibernate's temporary copy into el2. |
| 698 | * |
| 699 | * We can skip this step if we booted at EL1, or are running with VHE. |
| 700 | */ |
| 701 | if (el2_reset_needed()) { |
| 702 | phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ |
| 703 | el2_vectors += hibernate_el2_vectors - |
| 704 | __hibernate_exit_text_start; /* offset */ |
| 705 | |
| 706 | __hyp_set_vectors(el2_vectors); |
| 707 | } |
| 708 | |
| 709 | hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, |
| 710 | resume_hdr.reenter_kernel, restore_pblist, |
| 711 | resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); |
| 712 | |
Olivier Deprez | 157378f | 2022-04-04 15:47:50 +0200 | [diff] [blame^] | 713 | return 0; |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 714 | } |
| 715 | |
| 716 | int hibernate_resume_nonboot_cpu_disable(void) |
| 717 | { |
| 718 | if (sleep_cpu < 0) { |
| 719 | pr_err("Failing to resume from hibernate on an unknown CPU.\n"); |
| 720 | return -ENODEV; |
| 721 | } |
| 722 | |
| 723 | return freeze_secondary_cpus(sleep_cpu); |
| 724 | } |