| /* |
| * SPDX-License-Identifier: BSD-3-Clause |
| * SPDX-FileCopyrightText: Copyright TF-RMM Contributors. |
| */ |
| |
| #include <arch.h> |
| #include <arch_helpers.h> |
| #include <assert.h> |
| #include <attestation_token.h> |
| #include <buffer.h> |
| #include <cpuid.h> |
| #include <debug.h> |
| #include <errno.h> |
| #include <gic.h> |
| #include <granule.h> |
| #include <memory_alloc.h> |
| #include <sizes.h> |
| #include <slot_buf_arch.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <table.h> |
| #include <xlat_contexts.h> |
| #include <xlat_tables.h> |
| |
| /* |
| * The VA space size for the high region, which maps the slot buffers, |
| * needs to be a power of two, so round NR_CPU_SLOTS up to the closest |
| * power of two. |
| */ |
| #define ROUNDED_NR_CPU_SLOTS (1ULL << (64ULL - \ |
| __builtin_clzll((NR_CPU_SLOTS) - 1))) |
| |
| #define RMM_SLOT_BUF_VA_SIZE ((ROUNDED_NR_CPU_SLOTS) * (GRANULE_SIZE)) |
| |
| #define SLOT_VIRT ((ULL(0xffffffffffffffff) - \ |
| RMM_SLOT_BUF_VA_SIZE + ULL(1))) |
| |
| /* |
| * All the slot buffers for a given CPU must be mapped by a single translation |
| * table, which means the max VA size should be <= 4KB * 512 |
| */ |
| COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) <= (GRANULE_SIZE * XLAT_TABLE_ENTRIES)); |
| |
| /* |
| * For all translation stages if FEAT_TTST is implemented, while |
| * the PE is executing in AArch64 state and is using 4KB |
| * translation granules, the min address space size is 64KB |
| */ |
| COMPILER_ASSERT((RMM_SLOT_BUF_VA_SIZE) >= (1 << 16U)); |
| |
| #define RMM_SLOT_BUF_MMAP MAP_REGION_TRANSIENT( \ |
| SLOT_VIRT, \ |
| RMM_SLOT_BUF_VA_SIZE, \ |
| PAGE_SIZE) |
| |
| #define SLOT_BUF_MMAP_REGIONS UL(1) |
| |
| /* |
| * Attributes for a buffer slot page descriptor. |
| * Note that the AF bit on the descriptor is handled by the translation |
| * library (it assumes that access faults are not handled) so it does not |
| * need to be specified here. |
| */ |
| #define SLOT_DESC_ATTR \ |
| (MT_RW_DATA | MT_SHAREABILITY_ISH | MT_NG) |
| |
| /* |
| * The base tables for all the contexts are manually allocated as a continous |
| * block of memory. |
| */ |
| static uint64_t transient_base_table[XLAT_TABLE_ENTRIES * MAX_CPUS] |
| __aligned(BASE_XLAT_TABLES_ALIGNMENT) |
| __section("slot_buffer_xlat_tbls"); |
| |
| /* Allocate per-cpu xlat_ctx_tbls */ |
| static struct xlat_ctx_tbls slot_buf_tbls[MAX_CPUS]; |
| |
| /* |
| * Allocate mmap regions and define common xlat_ctx_cfg shared will |
| * all slot_buf_xlat_ctx |
| */ |
| XLAT_REGISTER_VA_SPACE(slot_buf, VA_HIGH_REGION, |
| SLOT_BUF_MMAP_REGIONS, |
| RMM_SLOT_BUF_VA_SIZE); |
| |
| /* context definition */ |
| static struct xlat_ctx slot_buf_xlat_ctx[MAX_CPUS]; |
| |
| /* |
| * Allocate a cache to store the last level table entry where the slot buffers |
| * are mapped to avoid needing to perform a table walk every time a buffer |
| * slot operation is needed. |
| */ |
| static struct xlat_table_entry te_cache[MAX_CPUS]; |
| |
| static uintptr_t slot_to_va(enum buffer_slot slot) |
| { |
| assert(slot < NR_CPU_SLOTS); |
| |
| return (uintptr_t)(SLOT_VIRT + (GRANULE_SIZE * slot)); |
| } |
| |
| static inline struct xlat_ctx *get_slot_buf_xlat_ctx(void) |
| { |
| return &slot_buf_xlat_ctx[my_cpuid()]; |
| } |
| |
| static inline struct xlat_table_entry *get_cache_entry(void) |
| { |
| return &te_cache[my_cpuid()]; |
| } |
| |
| __unused static uint64_t slot_to_descriptor(enum buffer_slot slot) |
| { |
| uint64_t *entry = xlat_get_pte_from_table(get_cache_entry(), |
| slot_to_va(slot)); |
| |
| return xlat_read_descriptor(entry); |
| } |
| |
| /* |
| * Setup xlat table for slot buffer mechanism for each PE. |
| * Must be called for every PE in the system |
| */ |
| void slot_buf_setup_xlat(void) |
| { |
| unsigned int cpuid = my_cpuid(); |
| int ret = xlat_ctx_create_dynamic(get_slot_buf_xlat_ctx(), |
| &slot_buf_xlat_ctx_cfg, |
| &slot_buf_tbls[cpuid], |
| &transient_base_table[ |
| XLAT_TABLE_ENTRIES * cpuid], |
| GET_NUM_BASE_LEVEL_ENTRIES( |
| RMM_SLOT_BUF_VA_SIZE), |
| NULL, |
| 0U); |
| |
| if (ret == -EINVAL) { |
| /* |
| * If the context was already created, carry on with the |
| * initialization. If it cannot be created, panic. |
| */ |
| ERROR("%s (%u): Failed to create the empty context for the slot buffers\n", |
| __func__, __LINE__); |
| panic(); |
| } |
| |
| if (xlat_ctx_cfg_initialized(get_slot_buf_xlat_ctx()) == false) { |
| /* Add necessary mmap regions during cold boot */ |
| struct xlat_mmap_region slot_buf_regions[] = { |
| RMM_SLOT_BUF_MMAP, |
| {0} |
| }; |
| |
| if (xlat_mmap_add_ctx(get_slot_buf_xlat_ctx(), |
| slot_buf_regions, true) != 0) { |
| ERROR("%s (%u): Failed to map slot buffer memory on high region\n", |
| __func__, __LINE__); |
| panic(); |
| } |
| |
| } |
| |
| if (xlat_ctx_tbls_initialized(get_slot_buf_xlat_ctx()) == false) { |
| /* |
| * Initialize the translation tables for the current context. |
| * This is done on the first boot of each CPU. |
| */ |
| int err; |
| |
| err = xlat_init_tables_ctx(get_slot_buf_xlat_ctx()); |
| if (err != 0) { |
| ERROR("%s (%u): xlat initialization failed with code %i\n", |
| __func__, __LINE__, err); |
| panic(); |
| } |
| } |
| |
| /* |
| * Confugure MMU registers. This function assumes that all the |
| * contexts of a particular VA region (HIGH or LOW VA) use the same |
| * limits for VA and PA spaces. |
| */ |
| if (xlat_arch_setup_mmu_cfg(get_slot_buf_xlat_ctx())) { |
| ERROR("%s (%u): MMU registers failed to initialize\n", |
| __func__, __LINE__); |
| panic(); |
| } |
| } |
| |
| /* |
| * Finishes initializing the slot buffer mechanism. |
| * This function must be called after the MMU is enabled. |
| */ |
| void slot_buf_init(void) |
| { |
| if (is_mmu_enabled() == false) { |
| ERROR("%s: MMU must be enabled\n", __func__); |
| panic(); |
| } |
| |
| /* |
| * Initialize (if not done yet) the internal cache with the last level |
| * translation table that holds the MMU descriptors for the slot |
| * buffers, so we can access them faster when we need to map/unmap. |
| */ |
| if ((get_cache_entry())->table == NULL) { |
| if (xlat_get_table_from_va(get_cache_entry(), |
| get_slot_buf_xlat_ctx(), |
| slot_to_va(SLOT_NS)) != 0) { |
| ERROR("%s (%u): Failed to initialize table entry cache for CPU %u\n", |
| __func__, __LINE__, my_cpuid()); |
| panic(); |
| |
| } |
| } |
| } |
| |
| /* |
| * Buffer slots are intended to be transient, and should not be live at |
| * entry/exit of the RMM. |
| */ |
| void assert_cpu_slots_empty(void) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < NR_CPU_SLOTS; i++) { |
| assert(slot_to_descriptor(i) == INVALID_DESC); |
| } |
| } |
| |
| static inline bool is_ns_slot(enum buffer_slot slot) |
| { |
| return slot == SLOT_NS; |
| } |
| |
| static inline bool is_realm_slot(enum buffer_slot slot) |
| { |
| return (slot != SLOT_NS) && (slot < NR_CPU_SLOTS); |
| } |
| |
| static void *ns_granule_map(enum buffer_slot slot, struct granule *granule) |
| { |
| unsigned long addr = granule_addr(granule); |
| |
| assert(is_ns_slot(slot)); |
| return buffer_arch_map(slot, addr, true); |
| } |
| |
| static void ns_buffer_unmap(enum buffer_slot slot) |
| { |
| assert(is_ns_slot(slot)); |
| |
| buffer_arch_unmap((void *)slot_to_va(slot)); |
| } |
| |
| /* |
| * Maps a granule @g into the provided @slot, returning |
| * the virtual address. |
| * |
| * The caller must either hold @g::lock or hold a reference. |
| */ |
| void *granule_map(struct granule *g, enum buffer_slot slot) |
| { |
| unsigned long addr = granule_addr(g); |
| |
| assert(is_realm_slot(slot)); |
| |
| return buffer_arch_map(slot, addr, false); |
| } |
| |
| void buffer_unmap(void *buf) |
| { |
| buffer_arch_unmap(buf); |
| } |
| |
| bool memcpy_ns_read(void *dest, const void *ns_src, unsigned long size); |
| bool memcpy_ns_write(void *ns_dest, const void *src, unsigned long size); |
| |
| /* |
| * Map a Non secure granule @g into the slot @slot and read data from |
| * this granule to @dest. Unmap the granule once the read is done. |
| * |
| * It returns 'true' on success or `false` if not all data are copied. |
| * Only the least significant bits of @offset are considered, which allows the |
| * full PA of a non-granule aligned buffer to be used for the @offset parameter. |
| */ |
| bool ns_buffer_read(enum buffer_slot slot, |
| struct granule *ns_gr, |
| unsigned int offset, |
| unsigned int size, |
| void *dest) |
| { |
| uintptr_t src; |
| bool retval; |
| |
| assert(is_ns_slot(slot)); |
| assert(ns_gr != NULL); |
| |
| /* |
| * To simplify the trapping mechanism around NS access, |
| * memcpy_ns_read uses a single 8-byte LDR instruction and |
| * all parameters must be aligned accordingly. |
| */ |
| assert(ALIGNED(size, 8)); |
| assert(ALIGNED(offset, 8)); |
| assert(ALIGNED(dest, 8)); |
| |
| offset &= ~GRANULE_MASK; |
| assert(offset + size <= GRANULE_SIZE); |
| |
| src = (uintptr_t)ns_granule_map(slot, ns_gr) + offset; |
| retval = memcpy_ns_read(dest, (void *)src, size); |
| ns_buffer_unmap(slot); |
| |
| return retval; |
| } |
| |
| /* |
| * Map a Non secure granule @g into the slot @slot and write data from |
| * this granule to @dest. Unmap the granule once the write is done. |
| * |
| * It returns 'true' on success or `false` if not all data are copied. |
| * Only the least significant bits of @offset are considered, which allows the |
| * full PA of a non-granule aligned buffer to be used for the @offset parameter. |
| */ |
| bool ns_buffer_write(enum buffer_slot slot, |
| struct granule *ns_gr, |
| unsigned int offset, |
| unsigned int size, |
| void *src) |
| { |
| uintptr_t dest; |
| bool retval; |
| |
| assert(is_ns_slot(slot)); |
| assert(ns_gr != NULL); |
| |
| /* |
| * To simplify the trapping mechanism around NS access, |
| * memcpy_ns_write uses a single 8-byte STR instruction and |
| * all parameters must be aligned accordingly. |
| */ |
| assert(ALIGNED(size, 8)); |
| assert(ALIGNED(offset, 8)); |
| assert(ALIGNED(src, 8)); |
| |
| offset &= ~GRANULE_MASK; |
| assert(offset + size <= GRANULE_SIZE); |
| |
| dest = (uintptr_t)ns_granule_map(slot, ns_gr) + offset; |
| retval = memcpy_ns_write((void *)dest, src, size); |
| ns_buffer_unmap(slot); |
| |
| return retval; |
| } |
| |
| /****************************************************************************** |
| * Internal helpers |
| ******************************************************************************/ |
| |
| void *buffer_map_internal(enum buffer_slot slot, unsigned long addr, bool ns) |
| { |
| uint64_t attr = SLOT_DESC_ATTR; |
| uintptr_t va = slot_to_va(slot); |
| struct xlat_table_entry *entry = get_cache_entry(); |
| |
| assert(GRANULE_ALIGNED(addr)); |
| |
| attr |= (ns == true ? MT_NS : MT_REALM); |
| |
| if (xlat_map_memory_page_with_attrs(entry, va, |
| (uintptr_t)addr, attr) != 0) { |
| /* Error mapping the buffer */ |
| return NULL; |
| } |
| |
| return (void *)va; |
| } |
| |
| void buffer_unmap_internal(void *buf) |
| { |
| /* |
| * Prevent the compiler from moving prior loads/stores to buf after the |
| * update to the translation table. Otherwise, those could fault. |
| */ |
| COMPILER_BARRIER(); |
| |
| xlat_unmap_memory_page(get_cache_entry(), (uintptr_t)buf); |
| } |