aboutsummaryrefslogtreecommitdiff
path: root/tftf
diff options
context:
space:
mode:
authorVarun Wadekar <vwadekar@nvidia.com>2020-05-11 16:45:56 -0700
committerManish Pandey <manish.pandey2@arm.com>2020-08-17 21:39:10 +0000
commit073a9e46483b51ecd45dbdbc39a5367523ff05f4 (patch)
tree464964bc7346ad41b5c6f30cbdc61690dcb97478 /tftf
parent83aa8e4b6d3abbaf43ab453c95befc3a8c281be8 (diff)
downloadtf-a-tests-073a9e46483b51ecd45dbdbc39a5367523ff05f4.tar.gz
Tegra194: support for RAS corrected error testing
This patch introduces a test to inject RAS corrected errors for all supported nodes from all CPUs. On injecting an error, the CPU verifies that the error was detected with the help of an IRQ. Signed-off-by: Varun Wadekar <vwadekar@nvidia.com> Change-Id: I4a679e432d36e20aac64e7062fbaab438e96dfc0
Diffstat (limited to 'tftf')
-rw-r--r--tftf/tests/plat/nvidia/tegra194/test_ras_corrected.c403
-rw-r--r--tftf/tests/tests-tegra194.mk1
-rw-r--r--tftf/tests/tests-tegra194.xml1
3 files changed, 405 insertions, 0 deletions
diff --git a/tftf/tests/plat/nvidia/tegra194/test_ras_corrected.c b/tftf/tests/plat/nvidia/tegra194/test_ras_corrected.c
new file mode 100644
index 00000000..a7be1a72
--- /dev/null
+++ b/tftf/tests/plat/nvidia/tegra194/test_ras_corrected.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <debug.h>
+#include <drivers/arm/arm_gic.h>
+#include <events.h>
+#include <lib/irq.h>
+#include <power_management.h>
+#include <test_helpers.h>
+#include <tftf_lib.h>
+
+#include <platform.h>
+
+#include "include/tegra194_ras.h"
+
+/* Macro to indicate CPU to start an action */
+#define START U(0xAA55)
+
+/* Global flag to indicate that a fault was received */
+static volatile uint64_t irq_received;
+
+/* NVIDIA Pseudo fault generation registers */
+#define T194_ERXPFGCTL_EL1 S3_0_C15_C1_4
+#define T194_ERXPFGCDN_EL1 S3_0_C15_C1_6
+DEFINE_RENAME_SYSREG_RW_FUNCS(erxpfgctl_el1, T194_ERXPFGCTL_EL1)
+DEFINE_RENAME_SYSREG_RW_FUNCS(erxpfgcdn_el1, T194_ERXPFGCDN_EL1)
+
+/* Instantiate RAS nodes */
+PER_CORE_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE);
+PER_CLUSTER_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE);
+SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE);
+CCPLEX_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE);
+
+/* Instantiate RAS node groups */
+static __unused struct ras_aux_data per_core_ras_group[] = {
+ PER_CORE_RAS_GROUP_NODES
+};
+
+static __unused struct ras_aux_data per_cluster_ras_group[] = {
+ PER_CLUSTER_RAS_GROUP_NODES
+};
+
+static __unused struct ras_aux_data scf_l3_ras_group[] = {
+ SCF_L3_BANK_RAS_GROUP_NODES
+};
+
+static __unused struct ras_aux_data ccplex_ras_group[] = {
+ CCPLEX_RAS_GROUP_NODES
+};
+
+/*
+ * we have same probe and handler for each error record group, use a macro to
+ * simply the record definition.
+ */
+#define ADD_ONE_ERR_GROUP(errselr_start, group) \
+ { \
+ .sysreg.idx_start = (errselr_start), \
+ .sysreg.num_idx = ARRAY_SIZE((group)), \
+ .aux_data = (group) \
+ }
+
+/* RAS error record group information */
+static struct err_record_info tegra194_ras_records[] = {
+ /*
+ * Per core ras error records
+ *
+ * ERRSELR starts from (0*256 + Logical_CPU_ID*16 + 0) to
+ * (0*256 + Logical_CPU_ID*16 + 5) for each group.
+ * 8 cores/groups, 6 * 8 nodes in total.
+ */
+ ADD_ONE_ERR_GROUP(0x000, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x010, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x020, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x030, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x040, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x050, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x060, per_core_ras_group),
+ ADD_ONE_ERR_GROUP(0x070, per_core_ras_group),
+
+ /*
+ * Per cluster ras error records
+ *
+ * ERRSELR starts from 2*256 + Logical_Cluster_ID*16 + 0 to
+ * 2*256 + Logical_Cluster_ID*16 + 3.
+ * 4 clusters/groups, 3 * 4 nodes in total.
+ */
+ ADD_ONE_ERR_GROUP(0x200, per_cluster_ras_group),
+ ADD_ONE_ERR_GROUP(0x210, per_cluster_ras_group),
+ ADD_ONE_ERR_GROUP(0x220, per_cluster_ras_group),
+ ADD_ONE_ERR_GROUP(0x230, per_cluster_ras_group),
+
+ /*
+ * SCF L3_Bank ras error records
+ *
+ * ERRSELR: 3*256 + L3_Bank_ID, L3_Bank_ID: 0-3
+ * 1 groups, 4 nodes in total.
+ */
+ ADD_ONE_ERR_GROUP(0x300, scf_l3_ras_group),
+
+ /*
+ * CCPLEX ras error records
+ *
+ * ERRSELR: 4*256 + Unit_ID, Unit_ID: 0 - 4
+ * 1 groups, 5 nodes in total.
+ */
+ ADD_ONE_ERR_GROUP(0x400, ccplex_ras_group),
+};
+
+static void test_ras_inject_error(uint32_t errselr_el1, unsigned int errctlr_bit)
+{
+ uint64_t pfg_ctlr = BIT_64(errctlr_bit);
+
+ INFO("Injecting on 0x%lx:\n\terrctlr_el1=%d\n\terrselr_el1=0x%x\n\tpfg_ctlr=0x%llx\n",
+ read_mpidr_el1(), errctlr_bit, errselr_el1, pfg_ctlr);
+
+ /* clear the flag before we inject error */
+ irq_received = 0;
+ dccvac((uint64_t)&irq_received);
+ dmbish();
+
+ /* Choose error record */
+ write_errselr_el1(errselr_el1);
+
+ /* Program count down timer to 1 */
+ write_erxpfgcdn_el1(1);
+
+ /* Start count down to generate error on expiry */
+ write_erxpfgctl_el1(3 << 6 | ERXPFGCTL_CDEN_BIT | pfg_ctlr);
+ waitms(5);
+
+ /* Wait until IRQ fires */
+ do {
+ /*
+ * clean+invalidate cache lines before reading the global
+ * flag populated by another CPU
+ */
+ dccivac((uint64_t)&irq_received);
+ dmbish();
+ } while (irq_received == 0);
+
+ /* write 1-to-clear */
+ write_erxstatus_el1(read_erxstatus_el1() | (3 << 24));
+}
+
+static void generate_corrected_faults(void)
+{
+ unsigned int i;
+ unsigned int j;
+ unsigned int k;
+ unsigned int total = 0;
+
+ for (i = 0; i < ARRAY_SIZE(tegra194_ras_records); i++)
+ total += tegra194_ras_records[i].sysreg.num_idx;
+
+ VERBOSE("Total Nodes:%u\n", total);
+
+ for (i = 0; i < ARRAY_SIZE(tegra194_ras_records); i++) {
+
+ const struct err_record_info *info = &tegra194_ras_records[i];
+ uint32_t idx_start = info->sysreg.idx_start;
+ uint32_t num_idx = info->sysreg.num_idx;
+ const struct ras_aux_data *aux_data =
+ (const struct ras_aux_data *)info->aux_data;
+
+ /* No corrected errors for this node */
+ if (idx_start == 0x400) {
+ VERBOSE("0x%lx skipping errselr_el1=0x%x\n",
+ read_mpidr_el1(), idx_start);
+ continue;
+ }
+
+ for (j = 0; j < num_idx; j++) {
+ uint32_t errselr_el1 = idx_start + j;
+ uint64_t __unused err_fr;
+ uint64_t uncorr_errs, corr_errs;
+
+ /* Write to ERRSELR_EL1 to select the error record */
+ write_errselr_el1(errselr_el1);
+
+ /*
+ * all supported errors for this node exist in the
+ * top 32 bits
+ */
+ err_fr = read_erxfr_el1();
+ err_fr >>= 32;
+ err_fr <<= 32;
+
+ /*
+ * Mask the corrected errors that are disabled
+ * in the ERXFR register
+ */
+ uncorr_errs = aux_data[j].err_ctrl();
+ corr_errs = ~uncorr_errs & err_fr;
+
+ for (k = 32; k < 64; k++) {
+ /*
+ * JSR_MTS node, errctlr_bit = 32 or 34
+ * are uncorrected errors and should be
+ * skipped
+ */
+ if ((idx_start < 0x200) && ((errselr_el1 & 0xF) == 2) && (k == 32 || k == 34)) {
+ VERBOSE("0x%lx skipping errselr_el1=0x%x\n",
+ read_mpidr_el1(), errselr_el1);
+ continue;
+ }
+
+ if (corr_errs & BIT_64(k))
+ test_ras_inject_error(errselr_el1, k);
+ }
+ }
+ }
+}
+
+static int ce_irq_handler(void *data)
+{
+ unsigned int __unused irq_num = *(unsigned int *)data;
+
+ /* write 1-to-clear */
+ write_erxstatus_el1(read_erxstatus_el1() | (3 << 24));
+
+ irq_received = 1;
+
+ /*
+ * clean cache lines after writing the global flag so that
+ * latest value is visible to other CPUs
+ */
+ dccvac((uint64_t)&irq_received);
+ dsbish();
+
+ /* Return value doesn't matter */
+ return 0;
+}
+
+static event_t cpu_booted[PLATFORM_CORE_COUNT];
+static volatile uint64_t cpu_powerdown[PLATFORM_CORE_COUNT];
+static volatile uint64_t cpu_start_test[PLATFORM_CORE_COUNT];
+static volatile uint64_t cpu_test_completed[PLATFORM_CORE_COUNT];
+
+static test_result_t test_corrected_errors(void)
+{
+ unsigned int mpid = read_mpidr_el1() & MPID_MASK;
+ unsigned int core_pos = platform_get_core_pos(mpid);
+
+ VERBOSE("Hello from core 0x%x\n", mpid);
+
+ /* Tell the lead CPU that the calling CPU has entered the test */
+ tftf_send_event(&cpu_booted[core_pos]);
+
+ /* Wait until lead CPU asks us to start the test */
+ do {
+ /*
+ * clean+invalidate cache lines before reading the global
+ * flag populated by another CPU
+ */
+ dccivac((uintptr_t)&cpu_start_test[core_pos]);
+ dmbish();
+ } while (!cpu_start_test[core_pos]);
+
+ generate_corrected_faults();
+
+ VERBOSE("0x%lx: test complete\n", read_mpidr_el1());
+
+ /* Inform lead CPU of test completion */
+ cpu_test_completed[core_pos] = true;
+ dccvac((uintptr_t)&cpu_test_completed[core_pos]);
+ dsbish();
+
+ /* Wait until lead CPU asks us to power down */
+ do {
+ /*
+ * clean+invalidate cache lines before reading the global
+ * flag populated by another CPU
+ */
+ dccivac((uintptr_t)&cpu_powerdown[core_pos]);
+ dmbish();
+ } while (!cpu_powerdown[core_pos]);
+
+ return TEST_RESULT_SUCCESS;
+}
+
+test_result_t test_ras_corrected(void)
+{
+ int64_t __unused ret = 0;
+ unsigned int cpu_node, cpu_mpid;
+ unsigned int lead_mpid = read_mpidr_el1() & MPID_MASK;
+ unsigned int core_pos;
+
+ tftf_testcase_printf("Tegra194 corrected RAS error verification\n");
+
+ /* long execution test; reset watchdog */
+ tftf_platform_watchdog_reset();
+
+ /* register IRQ handler */
+ for (uint32_t irq = 424; irq <= 431; irq++) {
+
+ ret = tftf_irq_register_handler(irq, ce_irq_handler);
+ if (ret < 0)
+ return TEST_RESULT_FAIL;
+
+ /* enable the IRQ */
+ tftf_irq_enable(irq, GIC_HIGHEST_NS_PRIORITY);
+ }
+
+ /* Power on all CPUs */
+ for_each_cpu(cpu_node) {
+
+ cpu_mpid = tftf_get_mpidr_from_node(cpu_node);
+ /* Skip lead CPU, it is already powered on */
+ if (cpu_mpid == lead_mpid)
+ continue;
+
+ ret = tftf_cpu_on(cpu_mpid,
+ (uintptr_t) test_corrected_errors,
+ 0);
+ if (ret != PSCI_E_SUCCESS)
+ return TEST_RESULT_FAIL;
+ }
+
+ /*
+ * The lead CPU needs to wait for all other CPUs to enter the test.
+ * This is because the test framework declares the end of a test when no
+ * CPU is in the test. Therefore, if the lead CPU goes ahead and exits
+ * the test then potentially there could be no CPU executing the test at
+ * this time because none of them have entered the test yet, hence the
+ * framework will be misled in thinking the test is finished.
+ */
+ for_each_cpu(cpu_node) {
+ cpu_mpid = tftf_get_mpidr_from_node(cpu_node);
+ /* Skip lead CPU */
+ if (cpu_mpid == lead_mpid)
+ continue;
+
+ core_pos = platform_get_core_pos(cpu_mpid);
+ tftf_wait_for_event(&cpu_booted[core_pos]);
+ }
+
+ /* Ask all CPUs to start the test */
+ for_each_cpu(cpu_node) {
+ cpu_mpid = tftf_get_mpidr_from_node(cpu_node);
+ /*
+ * Except lead CPU, Wait for all cores to be powered off
+ * by framework
+ */
+ if (cpu_mpid == lead_mpid)
+ continue;
+
+ /* Allow the CPU to start the test */
+ core_pos = platform_get_core_pos(cpu_mpid);
+ cpu_start_test[core_pos] = START;
+
+ /*
+ * clean cache lines after writing the global flag so that
+ * latest value is visible to other CPUs
+ */
+ dccvac((uintptr_t)&cpu_start_test[core_pos]);
+ dsbish();
+
+ /* Wait for the CPU to complete the test */
+ do {
+ /*
+ * clean+invalidate cache lines before reading the global
+ * flag populated by another CPU
+ */
+ dccivac((uintptr_t)&cpu_test_completed[core_pos]);
+ dmbish();
+ } while (!cpu_test_completed[core_pos]);
+ }
+
+ /* run through all supported corrected faults */
+ generate_corrected_faults();
+
+ /* Wait for all CPUs to power off */
+ for_each_cpu(cpu_node) {
+ cpu_mpid = tftf_get_mpidr_from_node(cpu_node);
+ /*
+ * Except lead CPU, Wait for all cores to be powered off
+ * by framework
+ */
+ if (cpu_mpid == lead_mpid)
+ continue;
+
+ /* Allow other CPUs to power down */
+ core_pos = platform_get_core_pos(cpu_mpid);
+ cpu_powerdown[core_pos] = START;
+
+ /*
+ * clean cache lines after writing the global flag so that
+ * latest value is visible to other CPUs
+ */
+ dccvac((uintptr_t)&cpu_powerdown[core_pos]);
+ dsbish();
+
+ /* Wait for the CPU to actually power off */
+ while (tftf_psci_affinity_info(cpu_mpid, MPIDR_AFFLVL0) != PSCI_STATE_OFF)
+ dsbsy();
+ }
+
+ return TEST_RESULT_SUCCESS;
+}
diff --git a/tftf/tests/tests-tegra194.mk b/tftf/tests/tests-tegra194.mk
index 7b26945a..890c8407 100644
--- a/tftf/tests/tests-tegra194.mk
+++ b/tftf/tests/tests-tegra194.mk
@@ -5,6 +5,7 @@
#
TESTS_SOURCES += $(addprefix tftf/tests/plat/nvidia/tegra194/, \
+ test_ras_corrected.c \
test_ras_uncorrectable.c \
serror_handler.S \
)
diff --git a/tftf/tests/tests-tegra194.xml b/tftf/tests/tests-tegra194.xml
index c1371f73..49f6f9e5 100644
--- a/tftf/tests/tests-tegra194.xml
+++ b/tftf/tests/tests-tegra194.xml
@@ -8,6 +8,7 @@
<testsuites>
<testsuite name="Tegra194 platform tests" description="Tests for Tegra194 platforms">
+ <testcase name="RAS corrected error test" function="test_ras_corrected" />
<testcase name="RAS uncorrectable error test" function="test_ras_uncorrectable" />
</testsuite>