Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * SPARC64 Huge TLB page support. |
| 4 | * |
| 5 | * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) |
| 6 | */ |
| 7 | |
| 8 | #include <linux/fs.h> |
| 9 | #include <linux/mm.h> |
| 10 | #include <linux/sched/mm.h> |
| 11 | #include <linux/hugetlb.h> |
| 12 | #include <linux/pagemap.h> |
| 13 | #include <linux/sysctl.h> |
| 14 | |
| 15 | #include <asm/mman.h> |
| 16 | #include <asm/pgalloc.h> |
| 17 | #include <asm/pgtable.h> |
| 18 | #include <asm/tlb.h> |
| 19 | #include <asm/tlbflush.h> |
| 20 | #include <asm/cacheflush.h> |
| 21 | #include <asm/mmu_context.h> |
| 22 | |
| 23 | /* Slightly simplified from the non-hugepage variant because by |
| 24 | * definition we don't have to worry about any page coloring stuff |
| 25 | */ |
| 26 | |
| 27 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, |
| 28 | unsigned long addr, |
| 29 | unsigned long len, |
| 30 | unsigned long pgoff, |
| 31 | unsigned long flags) |
| 32 | { |
| 33 | struct hstate *h = hstate_file(filp); |
| 34 | unsigned long task_size = TASK_SIZE; |
| 35 | struct vm_unmapped_area_info info; |
| 36 | |
| 37 | if (test_thread_flag(TIF_32BIT)) |
| 38 | task_size = STACK_TOP32; |
| 39 | |
| 40 | info.flags = 0; |
| 41 | info.length = len; |
| 42 | info.low_limit = TASK_UNMAPPED_BASE; |
| 43 | info.high_limit = min(task_size, VA_EXCLUDE_START); |
| 44 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
| 45 | info.align_offset = 0; |
| 46 | addr = vm_unmapped_area(&info); |
| 47 | |
| 48 | if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { |
| 49 | VM_BUG_ON(addr != -ENOMEM); |
| 50 | info.low_limit = VA_EXCLUDE_END; |
| 51 | info.high_limit = task_size; |
| 52 | addr = vm_unmapped_area(&info); |
| 53 | } |
| 54 | |
| 55 | return addr; |
| 56 | } |
| 57 | |
| 58 | static unsigned long |
| 59 | hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, |
| 60 | const unsigned long len, |
| 61 | const unsigned long pgoff, |
| 62 | const unsigned long flags) |
| 63 | { |
| 64 | struct hstate *h = hstate_file(filp); |
| 65 | struct mm_struct *mm = current->mm; |
| 66 | unsigned long addr = addr0; |
| 67 | struct vm_unmapped_area_info info; |
| 68 | |
| 69 | /* This should only ever run for 32-bit processes. */ |
| 70 | BUG_ON(!test_thread_flag(TIF_32BIT)); |
| 71 | |
| 72 | info.flags = VM_UNMAPPED_AREA_TOPDOWN; |
| 73 | info.length = len; |
| 74 | info.low_limit = PAGE_SIZE; |
| 75 | info.high_limit = mm->mmap_base; |
| 76 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
| 77 | info.align_offset = 0; |
| 78 | addr = vm_unmapped_area(&info); |
| 79 | |
| 80 | /* |
| 81 | * A failed mmap() very likely causes application failure, |
| 82 | * so fall back to the bottom-up function here. This scenario |
| 83 | * can happen with large stack limits and large mmap() |
| 84 | * allocations. |
| 85 | */ |
| 86 | if (addr & ~PAGE_MASK) { |
| 87 | VM_BUG_ON(addr != -ENOMEM); |
| 88 | info.flags = 0; |
| 89 | info.low_limit = TASK_UNMAPPED_BASE; |
| 90 | info.high_limit = STACK_TOP32; |
| 91 | addr = vm_unmapped_area(&info); |
| 92 | } |
| 93 | |
| 94 | return addr; |
| 95 | } |
| 96 | |
| 97 | unsigned long |
| 98 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
| 99 | unsigned long len, unsigned long pgoff, unsigned long flags) |
| 100 | { |
| 101 | struct hstate *h = hstate_file(file); |
| 102 | struct mm_struct *mm = current->mm; |
| 103 | struct vm_area_struct *vma; |
| 104 | unsigned long task_size = TASK_SIZE; |
| 105 | |
| 106 | if (test_thread_flag(TIF_32BIT)) |
| 107 | task_size = STACK_TOP32; |
| 108 | |
| 109 | if (len & ~huge_page_mask(h)) |
| 110 | return -EINVAL; |
| 111 | if (len > task_size) |
| 112 | return -ENOMEM; |
| 113 | |
| 114 | if (flags & MAP_FIXED) { |
| 115 | if (prepare_hugepage_range(file, addr, len)) |
| 116 | return -EINVAL; |
| 117 | return addr; |
| 118 | } |
| 119 | |
| 120 | if (addr) { |
| 121 | addr = ALIGN(addr, huge_page_size(h)); |
| 122 | vma = find_vma(mm, addr); |
| 123 | if (task_size - len >= addr && |
| 124 | (!vma || addr + len <= vm_start_gap(vma))) |
| 125 | return addr; |
| 126 | } |
| 127 | if (mm->get_unmapped_area == arch_get_unmapped_area) |
| 128 | return hugetlb_get_unmapped_area_bottomup(file, addr, len, |
| 129 | pgoff, flags); |
| 130 | else |
| 131 | return hugetlb_get_unmapped_area_topdown(file, addr, len, |
| 132 | pgoff, flags); |
| 133 | } |
| 134 | |
| 135 | static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
| 136 | { |
| 137 | return entry; |
| 138 | } |
| 139 | |
| 140 | static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
| 141 | { |
| 142 | unsigned long hugepage_size = _PAGE_SZ4MB_4V; |
| 143 | |
| 144 | pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; |
| 145 | |
| 146 | switch (shift) { |
| 147 | case HPAGE_16GB_SHIFT: |
| 148 | hugepage_size = _PAGE_SZ16GB_4V; |
| 149 | pte_val(entry) |= _PAGE_PUD_HUGE; |
| 150 | break; |
| 151 | case HPAGE_2GB_SHIFT: |
| 152 | hugepage_size = _PAGE_SZ2GB_4V; |
| 153 | pte_val(entry) |= _PAGE_PMD_HUGE; |
| 154 | break; |
| 155 | case HPAGE_256MB_SHIFT: |
| 156 | hugepage_size = _PAGE_SZ256MB_4V; |
| 157 | pte_val(entry) |= _PAGE_PMD_HUGE; |
| 158 | break; |
| 159 | case HPAGE_SHIFT: |
| 160 | pte_val(entry) |= _PAGE_PMD_HUGE; |
| 161 | break; |
| 162 | case HPAGE_64K_SHIFT: |
| 163 | hugepage_size = _PAGE_SZ64K_4V; |
| 164 | break; |
| 165 | default: |
| 166 | WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); |
| 167 | } |
| 168 | |
| 169 | pte_val(entry) = pte_val(entry) | hugepage_size; |
| 170 | return entry; |
| 171 | } |
| 172 | |
| 173 | static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift) |
| 174 | { |
| 175 | if (tlb_type == hypervisor) |
| 176 | return sun4v_hugepage_shift_to_tte(entry, shift); |
| 177 | else |
| 178 | return sun4u_hugepage_shift_to_tte(entry, shift); |
| 179 | } |
| 180 | |
| 181 | pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, |
| 182 | struct page *page, int writeable) |
| 183 | { |
| 184 | unsigned int shift = huge_page_shift(hstate_vma(vma)); |
| 185 | pte_t pte; |
| 186 | |
| 187 | pte = hugepage_shift_to_tte(entry, shift); |
| 188 | |
| 189 | #ifdef CONFIG_SPARC64 |
| 190 | /* If this vma has ADI enabled on it, turn on TTE.mcd |
| 191 | */ |
| 192 | if (vma->vm_flags & VM_SPARC_ADI) |
| 193 | return pte_mkmcd(pte); |
| 194 | else |
| 195 | return pte_mknotmcd(pte); |
| 196 | #else |
| 197 | return pte; |
| 198 | #endif |
| 199 | } |
| 200 | |
| 201 | static unsigned int sun4v_huge_tte_to_shift(pte_t entry) |
| 202 | { |
| 203 | unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V; |
| 204 | unsigned int shift; |
| 205 | |
| 206 | switch (tte_szbits) { |
| 207 | case _PAGE_SZ16GB_4V: |
| 208 | shift = HPAGE_16GB_SHIFT; |
| 209 | break; |
| 210 | case _PAGE_SZ2GB_4V: |
| 211 | shift = HPAGE_2GB_SHIFT; |
| 212 | break; |
| 213 | case _PAGE_SZ256MB_4V: |
| 214 | shift = HPAGE_256MB_SHIFT; |
| 215 | break; |
| 216 | case _PAGE_SZ4MB_4V: |
| 217 | shift = REAL_HPAGE_SHIFT; |
| 218 | break; |
| 219 | case _PAGE_SZ64K_4V: |
| 220 | shift = HPAGE_64K_SHIFT; |
| 221 | break; |
| 222 | default: |
| 223 | shift = PAGE_SHIFT; |
| 224 | break; |
| 225 | } |
| 226 | return shift; |
| 227 | } |
| 228 | |
| 229 | static unsigned int sun4u_huge_tte_to_shift(pte_t entry) |
| 230 | { |
| 231 | unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U; |
| 232 | unsigned int shift; |
| 233 | |
| 234 | switch (tte_szbits) { |
| 235 | case _PAGE_SZ256MB_4U: |
| 236 | shift = HPAGE_256MB_SHIFT; |
| 237 | break; |
| 238 | case _PAGE_SZ4MB_4U: |
| 239 | shift = REAL_HPAGE_SHIFT; |
| 240 | break; |
| 241 | case _PAGE_SZ64K_4U: |
| 242 | shift = HPAGE_64K_SHIFT; |
| 243 | break; |
| 244 | default: |
| 245 | shift = PAGE_SHIFT; |
| 246 | break; |
| 247 | } |
| 248 | return shift; |
| 249 | } |
| 250 | |
| 251 | static unsigned int huge_tte_to_shift(pte_t entry) |
| 252 | { |
| 253 | unsigned long shift; |
| 254 | |
| 255 | if (tlb_type == hypervisor) |
| 256 | shift = sun4v_huge_tte_to_shift(entry); |
| 257 | else |
| 258 | shift = sun4u_huge_tte_to_shift(entry); |
| 259 | |
| 260 | if (shift == PAGE_SHIFT) |
| 261 | WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n", |
| 262 | pte_val(entry)); |
| 263 | |
| 264 | return shift; |
| 265 | } |
| 266 | |
| 267 | static unsigned long huge_tte_to_size(pte_t pte) |
| 268 | { |
| 269 | unsigned long size = 1UL << huge_tte_to_shift(pte); |
| 270 | |
| 271 | if (size == REAL_HPAGE_SIZE) |
| 272 | size = HPAGE_SIZE; |
| 273 | return size; |
| 274 | } |
| 275 | |
| 276 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
| 277 | unsigned long addr, unsigned long sz) |
| 278 | { |
| 279 | pgd_t *pgd; |
| 280 | pud_t *pud; |
| 281 | pmd_t *pmd; |
| 282 | |
| 283 | pgd = pgd_offset(mm, addr); |
| 284 | pud = pud_alloc(mm, pgd, addr); |
| 285 | if (!pud) |
| 286 | return NULL; |
| 287 | if (sz >= PUD_SIZE) |
| 288 | return (pte_t *)pud; |
| 289 | pmd = pmd_alloc(mm, pud, addr); |
| 290 | if (!pmd) |
| 291 | return NULL; |
| 292 | if (sz >= PMD_SIZE) |
| 293 | return (pte_t *)pmd; |
| 294 | return pte_alloc_map(mm, pmd, addr); |
| 295 | } |
| 296 | |
| 297 | pte_t *huge_pte_offset(struct mm_struct *mm, |
| 298 | unsigned long addr, unsigned long sz) |
| 299 | { |
| 300 | pgd_t *pgd; |
| 301 | pud_t *pud; |
| 302 | pmd_t *pmd; |
| 303 | |
| 304 | pgd = pgd_offset(mm, addr); |
| 305 | if (pgd_none(*pgd)) |
| 306 | return NULL; |
| 307 | pud = pud_offset(pgd, addr); |
| 308 | if (pud_none(*pud)) |
| 309 | return NULL; |
| 310 | if (is_hugetlb_pud(*pud)) |
| 311 | return (pte_t *)pud; |
| 312 | pmd = pmd_offset(pud, addr); |
| 313 | if (pmd_none(*pmd)) |
| 314 | return NULL; |
| 315 | if (is_hugetlb_pmd(*pmd)) |
| 316 | return (pte_t *)pmd; |
| 317 | return pte_offset_map(pmd, addr); |
| 318 | } |
| 319 | |
| 320 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
| 321 | pte_t *ptep, pte_t entry) |
| 322 | { |
| 323 | unsigned int nptes, orig_shift, shift; |
| 324 | unsigned long i, size; |
| 325 | pte_t orig; |
| 326 | |
| 327 | size = huge_tte_to_size(entry); |
| 328 | |
| 329 | shift = PAGE_SHIFT; |
| 330 | if (size >= PUD_SIZE) |
| 331 | shift = PUD_SHIFT; |
| 332 | else if (size >= PMD_SIZE) |
| 333 | shift = PMD_SHIFT; |
| 334 | else |
| 335 | shift = PAGE_SHIFT; |
| 336 | |
| 337 | nptes = size >> shift; |
| 338 | |
| 339 | if (!pte_present(*ptep) && pte_present(entry)) |
| 340 | mm->context.hugetlb_pte_count += nptes; |
| 341 | |
| 342 | addr &= ~(size - 1); |
| 343 | orig = *ptep; |
| 344 | orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig); |
| 345 | |
| 346 | for (i = 0; i < nptes; i++) |
| 347 | ptep[i] = __pte(pte_val(entry) + (i << shift)); |
| 348 | |
| 349 | maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift); |
| 350 | /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ |
| 351 | if (size == HPAGE_SIZE) |
| 352 | maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0, |
| 353 | orig_shift); |
| 354 | } |
| 355 | |
| 356 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
| 357 | pte_t *ptep) |
| 358 | { |
| 359 | unsigned int i, nptes, orig_shift, shift; |
| 360 | unsigned long size; |
| 361 | pte_t entry; |
| 362 | |
| 363 | entry = *ptep; |
| 364 | size = huge_tte_to_size(entry); |
| 365 | |
| 366 | shift = PAGE_SHIFT; |
| 367 | if (size >= PUD_SIZE) |
| 368 | shift = PUD_SHIFT; |
| 369 | else if (size >= PMD_SIZE) |
| 370 | shift = PMD_SHIFT; |
| 371 | else |
| 372 | shift = PAGE_SHIFT; |
| 373 | |
| 374 | nptes = size >> shift; |
| 375 | orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry); |
| 376 | |
| 377 | if (pte_present(entry)) |
| 378 | mm->context.hugetlb_pte_count -= nptes; |
| 379 | |
| 380 | addr &= ~(size - 1); |
| 381 | for (i = 0; i < nptes; i++) |
| 382 | ptep[i] = __pte(0UL); |
| 383 | |
| 384 | maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift); |
| 385 | /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ |
| 386 | if (size == HPAGE_SIZE) |
| 387 | maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, |
| 388 | orig_shift); |
| 389 | |
| 390 | return entry; |
| 391 | } |
| 392 | |
| 393 | int pmd_huge(pmd_t pmd) |
| 394 | { |
| 395 | return !pmd_none(pmd) && |
| 396 | (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID; |
| 397 | } |
| 398 | |
| 399 | int pud_huge(pud_t pud) |
| 400 | { |
| 401 | return !pud_none(pud) && |
| 402 | (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID; |
| 403 | } |
| 404 | |
| 405 | static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, |
| 406 | unsigned long addr) |
| 407 | { |
| 408 | pgtable_t token = pmd_pgtable(*pmd); |
| 409 | |
| 410 | pmd_clear(pmd); |
| 411 | pte_free_tlb(tlb, token, addr); |
| 412 | mm_dec_nr_ptes(tlb->mm); |
| 413 | } |
| 414 | |
| 415 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
| 416 | unsigned long addr, unsigned long end, |
| 417 | unsigned long floor, unsigned long ceiling) |
| 418 | { |
| 419 | pmd_t *pmd; |
| 420 | unsigned long next; |
| 421 | unsigned long start; |
| 422 | |
| 423 | start = addr; |
| 424 | pmd = pmd_offset(pud, addr); |
| 425 | do { |
| 426 | next = pmd_addr_end(addr, end); |
| 427 | if (pmd_none(*pmd)) |
| 428 | continue; |
| 429 | if (is_hugetlb_pmd(*pmd)) |
| 430 | pmd_clear(pmd); |
| 431 | else |
| 432 | hugetlb_free_pte_range(tlb, pmd, addr); |
| 433 | } while (pmd++, addr = next, addr != end); |
| 434 | |
| 435 | start &= PUD_MASK; |
| 436 | if (start < floor) |
| 437 | return; |
| 438 | if (ceiling) { |
| 439 | ceiling &= PUD_MASK; |
| 440 | if (!ceiling) |
| 441 | return; |
| 442 | } |
| 443 | if (end - 1 > ceiling - 1) |
| 444 | return; |
| 445 | |
| 446 | pmd = pmd_offset(pud, start); |
| 447 | pud_clear(pud); |
| 448 | pmd_free_tlb(tlb, pmd, start); |
| 449 | mm_dec_nr_pmds(tlb->mm); |
| 450 | } |
| 451 | |
| 452 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, |
| 453 | unsigned long addr, unsigned long end, |
| 454 | unsigned long floor, unsigned long ceiling) |
| 455 | { |
| 456 | pud_t *pud; |
| 457 | unsigned long next; |
| 458 | unsigned long start; |
| 459 | |
| 460 | start = addr; |
| 461 | pud = pud_offset(pgd, addr); |
| 462 | do { |
| 463 | next = pud_addr_end(addr, end); |
| 464 | if (pud_none_or_clear_bad(pud)) |
| 465 | continue; |
| 466 | if (is_hugetlb_pud(*pud)) |
| 467 | pud_clear(pud); |
| 468 | else |
| 469 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
| 470 | ceiling); |
| 471 | } while (pud++, addr = next, addr != end); |
| 472 | |
| 473 | start &= PGDIR_MASK; |
| 474 | if (start < floor) |
| 475 | return; |
| 476 | if (ceiling) { |
| 477 | ceiling &= PGDIR_MASK; |
| 478 | if (!ceiling) |
| 479 | return; |
| 480 | } |
| 481 | if (end - 1 > ceiling - 1) |
| 482 | return; |
| 483 | |
| 484 | pud = pud_offset(pgd, start); |
| 485 | pgd_clear(pgd); |
| 486 | pud_free_tlb(tlb, pud, start); |
| 487 | mm_dec_nr_puds(tlb->mm); |
| 488 | } |
| 489 | |
| 490 | void hugetlb_free_pgd_range(struct mmu_gather *tlb, |
| 491 | unsigned long addr, unsigned long end, |
| 492 | unsigned long floor, unsigned long ceiling) |
| 493 | { |
| 494 | pgd_t *pgd; |
| 495 | unsigned long next; |
| 496 | |
| 497 | addr &= PMD_MASK; |
| 498 | if (addr < floor) { |
| 499 | addr += PMD_SIZE; |
| 500 | if (!addr) |
| 501 | return; |
| 502 | } |
| 503 | if (ceiling) { |
| 504 | ceiling &= PMD_MASK; |
| 505 | if (!ceiling) |
| 506 | return; |
| 507 | } |
| 508 | if (end - 1 > ceiling - 1) |
| 509 | end -= PMD_SIZE; |
| 510 | if (addr > end - 1) |
| 511 | return; |
| 512 | |
| 513 | pgd = pgd_offset(tlb->mm, addr); |
| 514 | do { |
| 515 | next = pgd_addr_end(addr, end); |
| 516 | if (pgd_none_or_clear_bad(pgd)) |
| 517 | continue; |
| 518 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
| 519 | } while (pgd++, addr = next, addr != end); |
| 520 | } |