feat(rdv3): enable numa aware per-cpu for RD-V3-Cfg2
RD-V3-Cfg2 being quad chip can make use of NUMA allocation within the
per-cpu framework. With NUMA allocation, the platform can distribute
per-cpu objects within a memory that is local to a particular node.
RD-V3-Cfg2 in this case has the per-cpu objects distributed across
different SRAMs present on the system.
introduce platform-specific helper functions to enhance the per_cpu
framework. Adds a helper function to zero init per_cpu sections,
ensuring clean initialization of per-cpu data. Introduces a function
to obtain the base address of per_cpu sections, facilitating efficient
access to per-CPU data structures. Enhances the per_cpu framework's
capability to handle platform-specific requirements.
These additions are crucial for maintaining the integrity and performance
of per-cpu operations.
Change-Id: If08169ba0de8fd7263db07d1587e598cffcf959a
Signed-off-by: Sammit Joshi <sammit.joshi@arm.com>
Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
diff --git a/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_css_fw_def3.h b/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_css_fw_def3.h
index bbfbe01..e4cccaa 100644
--- a/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_css_fw_def3.h
+++ b/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_css_fw_def3.h
@@ -118,6 +118,10 @@
BL32_LIMIT - BL32_BASE, \
MT_MEMORY | MT_RW | MT_SECURE)
#endif
+/*******************************************************************************
+ * Helper macros
+ ******************************************************************************/
+#define NRD_CSS_ALIGN_TO_4K(size) (((size) + 0xFFF) & ~0xFFF)
#if RESET_TO_BL31
/*******************************************************************************
diff --git a/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_plat_arm_def3.h b/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_plat_arm_def3.h
index 4936344..5d56cba 100644
--- a/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_plat_arm_def3.h
+++ b/plat/arm/board/neoverse_rd/common/include/nrd3/nrd_plat_arm_def3.h
@@ -30,6 +30,8 @@
NRD_MAX_CPUS_PER_CLUSTER * \
NRD_MAX_PE_PER_CPU)
+#define PLATFORM_NODE_CORE_COUNT (PLATFORM_CORE_COUNT/NRD_CHIP_COUNT)
+
/*******************************************************************************
* PA/VA config
******************************************************************************/
@@ -56,8 +58,8 @@
* chips are accessed - secure ram, css device and soc device regions.
*/
#if defined(IMAGE_BL31)
-# define PLAT_ARM_MMAP_ENTRIES (10 + ((NRD_CHIP_COUNT - 1) * 3))
-# define MAX_XLAT_TABLES (10 + ((NRD_CHIP_COUNT - 1) * 3))
+# define PLAT_ARM_MMAP_ENTRIES (11 + ((NRD_CHIP_COUNT - 1) * 3))
+# define MAX_XLAT_TABLES (11 + ((NRD_CHIP_COUNT - 1) * 3))
#elif defined(IMAGE_BL32)
# define PLAT_ARM_MMAP_ENTRIES U(8)
# define MAX_XLAT_TABLES U(5)
diff --git a/plat/arm/board/neoverse_rd/platform/rdv3/platform.mk b/plat/arm/board/neoverse_rd/platform/rdv3/platform.mk
index c587a55..394fe9b 100644
--- a/plat/arm/board/neoverse_rd/platform/rdv3/platform.mk
+++ b/plat/arm/board/neoverse_rd/platform/rdv3/platform.mk
@@ -53,6 +53,10 @@
# RD-V3 uses MHUv3
PLAT_MHU := MHUv3
+ifeq (${NRD_PLATFORM_VARIANT}, 2)
+override NUMA_AWARE_PER_CPU := 1
+endif
+
include plat/arm/board/neoverse_rd/common/nrd-common.mk
include drivers/arm/rse/rse_comms.mk
include drivers/auth/mbedtls/mbedtls_common.mk
@@ -116,6 +120,7 @@
${RDV3_BASE}/rdv3_topology.c \
${RDV3_BASE}/rdv3_plat_attest_token.c \
${RDV3_BASE}/rdv3_realm_attest_key.c \
+ ${RDV3_BASE}/rdv3_per_cpu.S \
drivers/arm/smmu/smmu_v3.c \
drivers/cfi/v2m/v2m_flash.c \
lib/psa/cca_attestation.c \
diff --git a/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_bl31_setup.c b/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_bl31_setup.c
index e32e761..6e851b7 100644
--- a/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_bl31_setup.c
+++ b/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_bl31_setup.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2024, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2025, Arm Limited and Contributors. All rights reserved.
*
* SPDX-License-Identifier: BSD-3-Clause
*/
@@ -14,6 +14,8 @@
#include <nrd_plat.h>
#include <nrd_variant.h>
#include <rdv3_rse_comms.h>
+#include <lib/per_cpu/per_cpu.h>
+#include <nrd_css_fw_def3.h>
#define RT_OWNER 0
@@ -132,6 +134,29 @@
};
#endif /* NRD_PLATFORM_VARIANT == 2 */
+void __init bl31_plat_arch_setup(void)
+{
+#if (NRD_PLATFORM_VARIANT == 2)
+#if NUMA_AWARE_PER_CPU
+ int ret;
+#endif
+ arm_bl31_plat_arch_setup();
+#if NUMA_AWARE_PER_CPU
+ for(int i = 1; i <= 3 ; i++) {
+ ret = mmap_add_dynamic_region(
+ NRD_REMOTE_CHIP_MEM_OFFSET(i),
+ NRD_REMOTE_CHIP_MEM_OFFSET(i),
+ NRD_CSS_ALIGN_TO_4K(PER_CPU_SIZE),
+ MT_MEMORY | MT_RW | EL3_PAS);
+ if (ret != 0) {
+ ERROR("Failed to add per-cpu mmap (ret=%d)", ret);
+ panic();
+ }
+ }
+#endif
+#endif
+}
+
void bl31_platform_setup(void)
{
/*
@@ -183,6 +208,16 @@
}
}
+/* Base address for different per CPU sections */
+const uintptr_t per_cpu_remote_section_base[] = {
+ (uintptr_t)PER_CPU_START,
+#if NUMA_AWARE_PER_CPU
+ (uintptr_t)NRD_REMOTE_CHIP_MEM_OFFSET(1),
+ (uintptr_t)NRD_REMOTE_CHIP_MEM_OFFSET(2),
+ (uintptr_t)NRD_REMOTE_CHIP_MEM_OFFSET(3)
+#endif
+};
+
#if RESET_TO_BL31
/*
* The GPT library might modify the gpt regions structure to optimize
diff --git a/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_per_cpu.S b/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_per_cpu.S
new file mode 100644
index 0000000..632f6f3
--- /dev/null
+++ b/plat/arm/board/neoverse_rd/platform/rdv3/rdv3_per_cpu.S
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2025, Arm Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef RDV3_PER_CPU_S
+#define RDV3_PER_CPU_S
+
+#include <asm_macros.S>
+#include <lib/per_cpu/per_cpu_defs.h>
+
+.globl plat_zero_init_per_cpu_section
+.globl plat_per_cpu_section_base
+
+/* ----------------------------------------------------------------------------
+ * Zero-initialization all nodes .per_cpu section
+ * Clobbers : x0 - x3, x10, x30
+ * ----------------------------------------------------------------------------
+*/
+func plat_zero_init_per_cpu_section
+ mov x10, x30
+ /* Zeromem requires x1 to be populated with the size */
+ adrp x1, __PER_CPU_SIZE__
+ add x1, x1, :lo12:__PER_CPU_SIZE__
+
+ /* Load the base address of array */
+ adrp x2, per_cpu_remote_section_base
+ add x2, x2, :lo12:per_cpu_remote_section_base
+
+ /* Start with 1st node */
+ mov x3, #1
+1:
+ /* Compare with number of nodes */
+ cmp x3, #NRD_CHIP_COUNT
+ bge 2f
+
+ /* Calculate the address of base[x0] */
+ /* Assuming uintptr_t is 8 bytes */
+ add x2, x2, x3, lsl #3
+ ldr x0, [x2]
+ bl zeromem
+ add x3, x3, #1
+ b 1b
+
+2:
+ mov x30, x10
+ ret
+endfunc plat_zero_init_per_cpu_section
+
+/* ----------------------------------------------------------------------------
+ * Calculate per cpu section base for particular cpu.
+ * Clobbers : x0 - x1
+ * return per cpu section base address in x0
+ * ----------------------------------------------------------------------------
+*/
+func plat_per_cpu_section_base
+ mov x1, #PLATFORM_NODE_CORE_COUNT
+ /* x0 = cpu_idx(x0) / (PLATFORM_NODE_CORE_COUNT) */
+ udiv x0, x0, x1
+
+ /* Load the base address of array */
+ adrp x1, per_cpu_remote_section_base
+ add x1, x1, :lo12:per_cpu_remote_section_base
+
+ /* Calculate the address of base[x0] */
+ /* Assuming uintptr_t is 8 bytes */
+ add x1, x1, x0, lsl #3
+ /* Loading x0 with particular per cpu section base address in x0 */
+ ldr x0, [x1]
+ ret
+endfunc plat_per_cpu_section_base
+
+#endif /* RDV3_PER_CPU_S*/