feat(pmu): add PMU support for Realms

This patch adds support for using PMU in Realms.
It adds 'bool pmu_enabled' and 'unsigned int pmu_num_cnts'
variables in 'struct rd' and 'struct rec.realm_info'.

Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
Change-Id: I13aad600a0215ba66d25be12ede5f4b86e6b018a
diff --git a/runtime/core/init.c b/runtime/core/init.c
index c067d12..81bdfeb 100644
--- a/runtime/core/init.c
+++ b/runtime/core/init.c
@@ -3,7 +3,7 @@
  * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
  */
 
-#include <arch_helpers.h>
+#include <arch_features.h>
 #include <attestation.h>
 #include <buffer.h>
 #include <debug.h>
@@ -28,7 +28,10 @@
 	SPE(write_pmscr_el2(PMSCR_EL2_INIT));
 
 	write_cnthctl_el2(CNTHCTL_EL2_INIT);
-	write_mdcr_el2(MDCR_EL2_INIT);
+	write_vpidr_el2(read_midr_el1());
+	write_mdcr_el2(MDCR_EL2_INIT |
+			INPLACE(MDCR_EL2_HPMN,
+			EXTRACT(PMCR_EL0_N, read_pmcr_el0())));
 }
 
 void rmm_warmboot_main(void)
diff --git a/runtime/core/run.c b/runtime/core/run.c
index f136947..64f4b93 100644
--- a/runtime/core/run.c
+++ b/runtime/core/run.c
@@ -11,6 +11,7 @@
 #include <cpuid.h>
 #include <exit.h>
 #include <fpu_helpers.h>
+#include <pmu.h>
 #include <rec.h>
 #include <run.h>
 #include <smc-rmi.h>
@@ -19,7 +20,8 @@
 
 static struct ns_state g_ns_data[MAX_CPUS];
 static uint8_t g_sve_data[MAX_CPUS][sizeof(struct sve_state)]
-		__attribute__((aligned(sizeof(__uint128_t))));
+		__aligned(sizeof(__uint128_t));
+static struct pmu_state g_pmu_data[MAX_CPUS];
 
 /*
  * Initialize the aux data and any buffer pointers to the aux granule memory for
@@ -29,10 +31,12 @@
 			  void *rec_aux,
 			  unsigned int num_rec_aux)
 {
-	aux_data->attest_heap_buf = (uint8_t *)rec_aux;
-
 	/* Ensure we have enough aux granules for use by REC */
-	assert(num_rec_aux >= REC_HEAP_PAGES);
+	assert(num_rec_aux >= REC_NUM_PAGES);
+
+	aux_data->attest_heap_buf = (uint8_t *)rec_aux;
+	aux_data->pmu = (struct pmu_state *)((uint8_t *)rec_aux +
+						REC_HEAP_SIZE);
 }
 
 /*
@@ -69,7 +73,6 @@
 	sysregs->elr_el1 = read_elr_el12();
 	sysregs->spsr_el1 = read_spsr_el12();
 	sysregs->pmcr_el0 = read_pmcr_el0();
-	sysregs->pmuserenr_el0 = read_pmuserenr_el0();
 	sysregs->tpidrro_el0 = read_tpidrro_el0();
 	sysregs->tpidr_el0 = read_tpidr_el0();
 	sysregs->csselr_el1 = read_csselr_el1();
@@ -105,7 +108,7 @@
 	sysregs->cntv_cval_el0 = read_cntv_cval_el02();
 }
 
-static void save_realm_state(struct rec *rec)
+static void save_realm_state(struct rec *rec, struct rmi_rec_exit *rec_exit)
 {
 	save_sysreg_state(&rec->sysregs);
 
@@ -113,6 +116,15 @@
 	rec->pstate = read_spsr_el2();
 
 	gic_save_state(&rec->sysregs.gicstate);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Expose PMU Realm state to NS */
+		pmu_update_rec_exit(rec_exit);
+
+		/* Save PMU context */
+		pmu_save_state(rec->aux_data.pmu,
+				rec->realm_info.pmu_num_cnts);
+	}
 }
 
 static void restore_sysreg_state(struct sysreg_state *sysregs)
@@ -122,7 +134,6 @@
 	write_elr_el12(sysregs->elr_el1);
 	write_spsr_el12(sysregs->spsr_el1);
 	write_pmcr_el0(sysregs->pmcr_el0);
-	write_pmuserenr_el0(sysregs->pmuserenr_el0);
 	write_tpidrro_el0(sysregs->tpidrro_el0);
 	write_tpidr_el0(sysregs->tpidr_el0);
 	write_csselr_el1(sysregs->csselr_el1);
@@ -166,6 +177,12 @@
 	write_cntv_ctl_el02(sysregs->cntv_ctl_el0);
 }
 
+static void configure_realm_stage2(struct rec *rec)
+{
+	write_vtcr_el2(rec->common_sysregs.vtcr_el2);
+	write_vttbr_el2(rec->common_sysregs.vttbr_el2);
+}
+
 static void restore_realm_state(struct rec *rec)
 {
 	/*
@@ -177,21 +194,29 @@
 	isb();
 
 	restore_sysreg_state(&rec->sysregs);
+
 	write_elr_el2(rec->pc);
 	write_spsr_el2(rec->pstate);
 	write_hcr_el2(rec->sysregs.hcr_el2);
 
+	/* Control trapping of accesses to PMU registers */
+	write_mdcr_el2(rec->common_sysregs.mdcr_el2);
+
 	gic_restore_state(&rec->sysregs.gicstate);
+
+	configure_realm_stage2(rec);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Restore PMU context */
+		pmu_restore_state(rec->aux_data.pmu,
+				  rec->realm_info.pmu_num_cnts);
+	}
 }
 
-static void configure_realm_stage2(struct rec *rec)
+static void save_ns_state(struct rec *rec)
 {
-	write_vtcr_el2(rec->common_sysregs.vtcr_el2);
-	write_vttbr_el2(rec->common_sysregs.vttbr_el2);
-}
+	struct ns_state *ns_state = rec->ns;
 
-static void save_ns_state(struct ns_state *ns_state)
-{
 	save_sysreg_state(&ns_state->sysregs);
 
 	/*
@@ -202,10 +227,17 @@
 	ns_state->sysregs.cnthctl_el2 = read_cnthctl_el2();
 
 	ns_state->icc_sre_el2 = read_icc_sre_el2();
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Save PMU context */
+		pmu_save_state(ns_state->pmu, rec->realm_info.pmu_num_cnts);
+	}
 }
 
-static void restore_ns_state(struct ns_state *ns_state)
+static void restore_ns_state(struct rec *rec)
 {
+	struct ns_state *ns_state = rec->ns;
+
 	restore_sysreg_state(&ns_state->sysregs);
 
 	/*
@@ -216,6 +248,12 @@
 	write_cnthctl_el2(ns_state->sysregs.cnthctl_el2);
 
 	write_icc_sre_el2(ns_state->icc_sre_el2);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Restore PMU state */
+		pmu_restore_state(ns_state->pmu,
+				  rec->realm_info.pmu_num_cnts);
+	}
 }
 
 static void activate_events(struct rec *rec)
@@ -243,14 +281,18 @@
 	void *rec_aux;
 	unsigned int cpuid = my_cpuid();
 
-	assert(rec->ns == NULL);
-
 	assert(cpuid < MAX_CPUS);
+	assert(rec->ns == NULL);
+	assert(rec->fpu_ctx.used == false);
+
 	ns_state = &g_ns_data[cpuid];
 
-	/* ensure SVE/FPU context is cleared */
+	/* Ensure SVE/FPU and PMU context is cleared */
 	assert(ns_state->sve == NULL);
 	assert(ns_state->fpu == NULL);
+	assert(ns_state->pmu == NULL);
+
+	rec->ns = ns_state;
 
 	/* Map auxiliary granules */
 	rec_aux = map_rec_aux(rec->g_aux, rec->num_rec_aux);
@@ -270,7 +312,7 @@
 	 */
 	if (!rec->alloc_info.ctx_initialised) {
 		(void)attestation_heap_ctx_init(rec->aux_data.attest_heap_buf,
-						REC_HEAP_PAGES * SZ_4K);
+						REC_HEAP_SIZE);
 		rec->alloc_info.ctx_initialised = true;
 	}
 
@@ -280,15 +322,11 @@
 		ns_state->fpu = (struct fpu_state *)&g_sve_data[cpuid];
 	}
 
-	save_ns_state(ns_state);
+	ns_state->pmu = &g_pmu_data[cpuid];
+
+	save_ns_state(rec);
 	restore_realm_state(rec);
 
-	/* Prepare for lazy save/restore of FPU/SIMD registers. */
-	rec->ns = ns_state;
-	assert(rec->fpu_ctx.used == false);
-
-	configure_realm_stage2(rec);
-
 	do {
 		/*
 		 * We must check the status of the arch timers in every
@@ -335,22 +373,23 @@
 		rec->fpu_ctx.used = false;
 	}
 
+	report_timer_state_to_ns(rec_exit);
+
+	save_realm_state(rec, rec_exit);
+	restore_ns_state(rec);
+
 	/*
-	 * Clear FPU/SVE context while exiting
+	 * Clear FPU/SVE and PMU context while exiting
 	 */
 	ns_state->sve = NULL;
 	ns_state->fpu = NULL;
+	ns_state->pmu = NULL;
 
 	/*
 	 * Clear NS pointer since that struct is local to this function.
 	 */
 	rec->ns = NULL;
 
-	report_timer_state_to_ns(rec_exit);
-
-	save_realm_state(rec);
-	restore_ns_state(ns_state);
-
 	/* Undo the heap association */
 	attestation_heap_ctx_unassign_pe();
 	/* Unmap auxiliary granules */
diff --git a/runtime/core/sysregs.c b/runtime/core/sysregs.c
index c45b474..940b0b4 100644
--- a/runtime/core/sysregs.c
+++ b/runtime/core/sysregs.c
@@ -36,30 +36,32 @@
  * - Debug architecture version:
  *   set in ID_AA64DFR0_EL1_SET
  * - Trace unit System registers not implemented
- * - PMU is not implemented
  * - Number of breakpoints:
  *   set in ID_AA64DFR0_EL1_SET
+ * - PMU Snapshot extension not implemented
  * - Number of watchpoints:
  *   set in ID_AA64DFR0_EL1_SET
+ * - Synchronous-exception-based event profiling not implemented
  * - Number of breakpoints that are context-aware
  * - Statistical Profiling Extension not implemented
  * - Armv8.4 Self-hosted Trace Extension not implemented
  * - Trace Buffer Extension not implemented
- * - FEAT_MTPMU not implemented
  * - Branch Record Buffer Extension not implemented
+ * - Trace Buffer External Mode not implemented
  */
 #define ID_AA64DFR0_EL1_CLEAR			  \
 	MASK(ID_AA64DFR0_EL1_DebugVer)		| \
 	MASK(ID_AA64DFR0_EL1_TraceVer)		| \
-	MASK(ID_AA64DFR0_EL1_PMUVer)		| \
 	MASK(ID_AA64DFR0_EL1_BRPs)		| \
+	MASK(ID_AA64DFR0_EL1_PMSS)		| \
 	MASK(ID_AA64DFR0_EL1_WRPs)		| \
+	MASK(ID_AA64DFR0_EL1_SEBEP)		| \
 	MASK(ID_AA64DFR0_EL1_CTX_CMPS)		| \
 	MASK(ID_AA64DFR0_EL1_PMSVer)		| \
 	MASK(ID_AA64DFR0_EL1_TraceFilt)		| \
 	MASK(ID_AA64DFR0_EL1_TraceBuffer)	| \
-	MASK(ID_AA64DFR0_EL1_MTPMU)		| \
-	MASK(ID_AA64DFR0_EL1_BRBE)
+	MASK(ID_AA64DFR0_EL1_BRBE)		| \
+	MASK(ID_AA64DFR0_EL1_ExtTrcBuff)
 
 /*
  * Set fields:
@@ -67,12 +69,23 @@
  * - Number of breakpoints: 2
  * - Number of watchpoints: 2
  */
-#define ID_AA64DFR0_EL1_SET			  \
-	ID_AA64DFR0_EL1_DebugVer_8		| \
-	INPLACE(ID_AA64DFR0_EL1_BRPs, 1UL)	| \
+#define ID_AA64DFR0_EL1_SET						  \
+	INPLACE(ID_AA64DFR0_EL1_DebugVer, ID_AA64DFR0_EL1_Debugv8)	| \
+	INPLACE(ID_AA64DFR0_EL1_BRPs, 1UL)				| \
 	INPLACE(ID_AA64DFR0_EL1_WRPs, 1UL)
 
 /*
+ * ID_AA64DFR1_EL1:
+ *
+ * Cleared fields:
+ * - Exception-based event profiling not implemented
+ * - PMU fixed-function instruction counter not implemented
+ */
+#define ID_AA64DFR1_EL1_CLEAR		  \
+	MASK(ID_AA64DFR1_EL1_EBEP)	| \
+	MASK(ID_AA64DFR1_EL1_ICNTR)
+
+/*
  * ID_AA64ISAR1_EL1:
  *
  * Cleared fields:
@@ -147,7 +160,7 @@
 		value = SYSREG_READ_CLEAR_SET(DFR0);
 		break;
 	SYSREG_CASE(DFR1)
-		value = SYSREG_READ(DFR1);
+		value = SYSREG_READ_CLEAR(DFR1);
 		break;
 	SYSREG_CASE(ISAR0)
 		value = SYSREG_READ(ISAR0);
diff --git a/runtime/rmi/feature.c b/runtime/rmi/feature.c
index a5da413..a5c4a6a 100644
--- a/runtime/rmi/feature.c
+++ b/runtime/rmi/feature.c
@@ -9,8 +9,9 @@
 #include <smc-handler.h>
 #include <smc-rmi.h>
 #include <status.h>
+#include <utils_def.h>
 
-#define RMM_FEATURE_MIN_IPA_SIZE		PARANGE_0000_WIDTH
+#define RMM_FEATURE_MIN_IPA_SIZE	PARANGE_0000_WIDTH
 
 static unsigned long get_feature_register_0(void)
 {
@@ -29,10 +30,16 @@
 	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_HASH_SHA_512,
 				RMI_SUPPORTED);
 
-	/* PMU is not supported */
+	/* RMM supports PMUv3p7+ */
+	assert(read_pmu_version() >= ID_AA64DFR0_EL1_PMUv3p7);
+
+	/* Set support for PMUv3 */
 	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_EN,
-				RMI_NOT_SUPPORTED);
-	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, 0U);
+				RMI_SUPPORTED);
+
+	/* Set number of PMU counters available */
+	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS,
+				EXTRACT(PMCR_EL0_N, read_pmcr_el0()));
 
 	return feat_reg0;
 }
@@ -63,13 +70,19 @@
 
 	/* Validate LPA2 flag */
 	if ((EXTRACT(RMM_FEATURE_REGISTER_0_LPA2, value) == RMI_LPA2) &&
-	    !is_feat_lpa2_4k_present()) {
+	    (EXTRACT(RMM_FEATURE_REGISTER_0_LPA2, feat_reg0) == RMI_NO_LPA2)) {
 		return false;
 	}
 
-	/* Validate PMU_EN flag */
-	if ((EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, value) == RMI_SUPPORTED) ||
-	    (EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, value) != 0U)) {
+	/*
+	 * Skip validation of RMM_FEATURE_REGISTER_0_PMU_EN flag
+	 * as RMM always assumes that PMUv3p7+ is present.
+	 */
+
+	/* Validate number of PMU counters if PMUv3 is enabled */
+	if ((EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, value) == RMI_SUPPORTED) &&
+	    (EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, value) !=
+	     EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, feat_reg0))) {
 		return false;
 	}
 
diff --git a/runtime/rmi/realm.c b/runtime/rmi/realm.c
index b348e90..f8f59fc 100644
--- a/runtime/rmi/realm.c
+++ b/runtime/rmi/realm.c
@@ -297,7 +297,7 @@
 	rd->s2_ctx.ipa_bits = requested_ipa_bits(&p);
 	rd->s2_ctx.s2_starting_level = p.rtt_level_start;
 	rd->s2_ctx.num_root_rtts = p.rtt_num_start;
-	memcpy(&rd->rpv[0], &p.rpv[0], RPV_SIZE);
+	(void)memcpy(&rd->rpv[0], &p.rpv[0], RPV_SIZE);
 
 	rd->s2_ctx.vmid = (unsigned int)p.vmid;
 
@@ -315,6 +315,10 @@
 		rd->algorithm = HASH_ALGO_SHA512;
 		break;
 	}
+
+	rd->pmu_enabled = EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, p.features_0);
+	rd->pmu_num_cnts = EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, p.features_0);
+
 	realm_params_measure(rd, &p);
 
 	buffer_unmap(rd);
diff --git a/runtime/rmi/rec.c b/runtime/rmi/rec.c
index 5f82ad1..4bb4ebe 100644
--- a/runtime/rmi/rec.c
+++ b/runtime/rmi/rec.c
@@ -74,11 +74,12 @@
 static void init_rec_sysregs(struct rec *rec, unsigned long mpidr)
 {
 	/* Set non-zero values only */
-	rec->sysregs.pmcr_el0 = PMCR_EL0_RES1;
+	rec->sysregs.pmcr_el0 = rec->realm_info.pmu_enabled ?
+				PMCR_EL0_INIT_RESET : PMCR_EL0_INIT;
+
 	rec->sysregs.sctlr_el1 = SCTLR_EL1_FLAGS;
 	rec->sysregs.mdscr_el1 = MDSCR_EL1_TDCC_BIT;
 	rec->sysregs.vmpidr_el2 = mpidr | VMPIDR_EL2_RES1;
-
 	rec->sysregs.cnthctl_el2 = CNTHCTL_EL2_NO_TRAPS;
 }
 
@@ -115,12 +116,23 @@
 
 static void init_common_sysregs(struct rec *rec, struct rd *rd)
 {
+	unsigned long mdcr_el2_val = read_mdcr_el2();
+
 	/* Set non-zero values only */
 	rec->common_sysregs.hcr_el2 = HCR_FLAGS;
 	rec->common_sysregs.vtcr_el2 =  realm_vtcr(rd);
-	rec->common_sysregs.vttbr_el2 = granule_addr(rd->s2_ctx.g_rtt);
-	rec->common_sysregs.vttbr_el2 &= MASK(TTBRx_EL2_BADDR);
-	rec->common_sysregs.vttbr_el2 |= INPLACE(VTTBR_EL2_VMID, rd->s2_ctx.vmid);
+	rec->common_sysregs.vttbr_el2 = (granule_addr(rd->s2_ctx.g_rtt) &
+					MASK(TTBRx_EL2_BADDR)) |
+					INPLACE(VTTBR_EL2_VMID, rd->s2_ctx.vmid);
+
+	/* Control trapping of accesses to PMU registers */
+	if (rd->pmu_enabled) {
+		mdcr_el2_val &= ~(MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT);
+	} else {
+		mdcr_el2_val |= (MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT);
+	}
+
+	rec->common_sysregs.mdcr_el2 = mdcr_el2_val;
 }
 
 static void init_rec_regs(struct rec *rec,
@@ -267,6 +279,9 @@
 	rec->realm_info.g_rtt = rd->s2_ctx.g_rtt;
 	rec->realm_info.g_rd = g_rd;
 
+	rec->realm_info.pmu_enabled = rd->pmu_enabled;
+	rec->realm_info.pmu_num_cnts = rd->pmu_num_cnts;
+
 	rec_params_measure(rd, &rec_params);
 
 	/*
diff --git a/runtime/rsi/realm_attest.c b/runtime/rsi/realm_attest.c
index 54ddb8e..8bb3c89 100644
--- a/runtime/rsi/realm_attest.c
+++ b/runtime/rsi/realm_attest.c
@@ -186,7 +186,7 @@
 
 		rec->token_sign_ctx.state = ATTEST_SIGN_NOT_STARTED;
 		restart = attestation_heap_reinit_pe(rec->aux_data.attest_heap_buf,
-						      REC_HEAP_PAGES * SZ_4K);
+							REC_HEAP_SIZE);
 		if (restart != 0) {
 			/* There is no provision for this failure so panic */
 			panic();