feat(pmu): add PMU support for Realms

This patch adds support for using PMU in Realms.
It adds 'bool pmu_enabled' and 'unsigned int pmu_num_cnts'
variables in 'struct rd' and 'struct rec.realm_info'.

Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
Change-Id: I13aad600a0215ba66d25be12ede5f4b86e6b018a
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index efe5625..7b9b2ed 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -16,9 +16,9 @@
               rmm-lib-realm
               rmm-lib-rmm_el3_ifc
               rmm-lib-smc
-              t_cose
               rmm-lib-timers
-              rmm-lib-xlat)
+              rmm-lib-xlat
+              t_cose)
 
 add_subdirectory("allocator")
 add_subdirectory("arch")
@@ -29,8 +29,8 @@
 add_subdirectory("mbedtls")
 add_subdirectory("measurement")
 add_subdirectory("realm")
-add_subdirectory("smc")
 add_subdirectory("rmm_el3_ifc")
+add_subdirectory("smc")
 add_subdirectory("t_cose")
 add_subdirectory("timers")
 add_subdirectory("xlat")
diff --git a/lib/allocator/include/memory_alloc.h b/lib/allocator/include/memory_alloc.h
index 5346395..1ff2b9e 100644
--- a/lib/allocator/include/memory_alloc.h
+++ b/lib/allocator/include/memory_alloc.h
@@ -11,11 +11,15 @@
 struct _memory_header;
 typedef struct memory_header_s memory_header_t;
 
-/*
- * Number of pages per REC to be allocated. MbedTLS needs 8K of heap
- * for attestation usecases.
- */
+/* MbedTLS needs 8K of heap for attestation usecases */
 #define REC_HEAP_PAGES		2
+#define REC_HEAP_SIZE		(REC_HEAP_PAGES * SZ_4K)
+
+/* Number of pages per REC for PMU state */
+#define REC_PMU_PAGES		1
+
+/* Number of pages per REC to be allocated */
+#define REC_NUM_PAGES		(REC_HEAP_PAGES + REC_PMU_PAGES)
 
 struct buffer_alloc_ctx {
 	unsigned char		*buf;
@@ -36,7 +40,6 @@
 	size_t			magic2;
 };
 
-
 /*
  * Function to assign a heap context to the current CPU for
  * use by the MbedCrypto. In case the heap needs to be isolated
diff --git a/lib/arch/CMakeLists.txt b/lib/arch/CMakeLists.txt
index bb0367e..8afba1a 100644
--- a/lib/arch/CMakeLists.txt
+++ b/lib/arch/CMakeLists.txt
@@ -6,7 +6,8 @@
 add_library(rmm-lib-arch)
 
 target_link_libraries(rmm-lib-arch
-    PRIVATE rmm-lib-common)
+    PRIVATE rmm-lib-common
+            rmm-lib-smc)
 
 target_include_directories(rmm-lib-arch
     PUBLIC "include"
@@ -14,7 +15,8 @@
 
 target_sources(rmm-lib-arch
         PRIVATE "src/arch_features.c"
-                "src/fpu_helpers.c")
+                "src/fpu_helpers.c"
+                "src/pmu.c")
 
 if(NOT RMM_ARCH STREQUAL fake_host)
     target_sources(rmm-lib-arch
diff --git a/lib/arch/include/arch.h b/lib/arch/include/arch.h
index cc46639..6d898fe 100644
--- a/lib/arch/include/arch.h
+++ b/lib/arch/include/arch.h
@@ -395,6 +395,9 @@
 #define ID_AA64DFR0_EL1_HPMN0_SHIFT		UL(60)
 #define ID_AA64DFR0_EL1_HPMN0_WIDTH		UL(4)
 
+#define ID_AA64DFR0_EL1_ExtTrcBuff_SHIFT	UL(56)
+#define ID_AA64DFR0_EL1_ExtTrcBuff_WIDTH	UL(4)
+
 #define ID_AA64DFR0_EL1_BRBE_SHIFT		UL(52)
 #define ID_AA64DFR0_EL1_BRBE_WIDTH		UL(4)
 
@@ -416,15 +419,26 @@
 #define ID_AA64DFR0_EL1_CTX_CMPS_SHIFT		UL(28)
 #define ID_AA64DFR0_EL1_CTX_CMPS_WIDTH		UL(4)
 
+#define ID_AA64DFR0_EL1_SEBEP_SHIFT		UL(24)
+#define ID_AA64DFR0_EL1_SEBEP_WIDTH		UL(4)
+
 #define ID_AA64DFR0_EL1_WRPs_SHIFT		UL(20)
 #define ID_AA64DFR0_EL1_WRPs_WIDTH		UL(4)
 
+#define ID_AA64DFR0_EL1_PMSS_SHIFT		UL(16)
+#define ID_AA64DFR0_EL1_PMSS_WIDTH		UL(4)
+
 #define ID_AA64DFR0_EL1_BRPs_SHIFT		UL(12)
 #define ID_AA64DFR0_EL1_BRPs_WIDTH		UL(4)
 
 #define ID_AA64DFR0_EL1_PMUVer_SHIFT		UL(8)
 #define ID_AA64DFR0_EL1_PMUVer_WIDTH		UL(4)
 
+/* Performance Monitors Extension version */
+#define ID_AA64DFR0_EL1_PMUv3p7			UL(7)
+#define ID_AA64DFR0_EL1_PMUv3p8			UL(8)
+#define ID_AA64DFR0_EL1_PMUv3p9			UL(9)
+
 #define ID_AA64DFR0_EL1_TraceVer_SHIFT		UL(4)
 #define ID_AA64DFR0_EL1_TraceVer_WIDTH		UL(4)
 
@@ -432,11 +446,18 @@
 #define ID_AA64DFR0_EL1_DebugVer_WIDTH		UL(4)
 
 /* Debug architecture version */
-#define ID_AA64DFR0_EL1_DebugVer_8	UL(6)
-#define ID_AA64DFR0_EL1_DebugVer_8_VHE	UL(7)
-#define ID_AA64DFR0_EL1_DebugVer_8_2	UL(8)
-#define ID_AA64DFR0_EL1_DebugVer_8_4	UL(9)
-#define ID_AA64DFR0_EL1_DebugVer_8_8	UL(10)
+#define ID_AA64DFR0_EL1_Debugv8			UL(6)
+#define ID_AA64DFR0_EL1_DebugVHE		UL(7)
+#define ID_AA64DFR0_EL1_Debugv8p2		UL(8)
+#define ID_AA64DFR0_EL1_Debugv8p4		UL(9)
+#define ID_AA64DFR0_EL1_Debugv8p8		UL(10)
+
+/* ID_AA64DFR1_EL1 definitions */
+#define ID_AA64DFR1_EL1_EBEP_SHIFT		UL(48)
+#define ID_AA64DFR1_EL1_EBEP_WIDTH		UL(4)
+
+#define ID_AA64DFR1_EL1_ICNTR_SHIFT		UL(36)
+#define ID_AA64DFR1_EL1_ICNTR_WIDTH		UL(4)
 
 /* ID_AA64PFR0_EL1 definitions */
 #define ID_AA64PFR0_EL1_SVE_SHIFT	UL(32)
@@ -463,34 +484,35 @@
 
 #define ID_AA64MMFR0_EL1_ECV_SHIFT		UL(60)
 #define ID_AA64MMFR0_EL1_ECV_WIDTH		UL(4)
-#define ID_AA64MMFR0_EL1_ECV_NOT_SUPPORTED	ULL(0x0)
-#define ID_AA64MMFR0_EL1_ECV_SUPPORTED		ULL(0x1)
+#define ID_AA64MMFR0_EL1_ECV_NOT_SUPPORTED	UL(0x0)
+#define ID_AA64MMFR0_EL1_ECV_SUPPORTED		UL(0x1)
 #define ID_AA64MMFR0_EL1_ECV_SELF_SYNCH	ULL(0x2)
 
 #define ID_AA64MMFR0_EL1_FGT_SHIFT		UL(56)
 #define ID_AA64MMFR0_EL1_FGT_WIDTH		UL(4)
-#define ID_AA64MMFR0_EL1_FGT_SUPPORTED		ULL(0x1)
-#define ID_AA64MMFR0_EL1_FGT_NOT_SUPPORTED	ULL(0x0)
+#define ID_AA64MMFR0_EL1_FGT_NOT_SUPPORTED	UL(0x0)
+#define ID_AA64MMFR0_EL1_FGT_SUPPORTED		UL(0x1)
+#define ID_AA64MMFR0_EL1_FGT2_SUPPORTED		UL(0x2)
 
 #define ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT		U(40)
 #define ID_AA64MMFR0_EL1_TGRAN4_2_WIDTH		U(4)
-#define ID_AA64MMFR0_EL1_TGRAN4_2_TGRAN4	ULL(0x0)
-#define ID_AA64MMFR0_EL1_TGRAN4_2_NOT_SUPPORTED	ULL(0x1)
-#define ID_AA64MMFR0_EL1_TGRAN4_2_SUPPORTED	ULL(0x2)
-#define ID_AA64MMFR0_EL1_TGRAN4_2_LPA2		ULL(0x3)
+#define ID_AA64MMFR0_EL1_TGRAN4_2_TGRAN4	UL(0x0)
+#define ID_AA64MMFR0_EL1_TGRAN4_2_NOT_SUPPORTED	UL(0x1)
+#define ID_AA64MMFR0_EL1_TGRAN4_2_SUPPORTED	UL(0x2)
+#define ID_AA64MMFR0_EL1_TGRAN4_2_LPA2		UL(0x3)
 
 #define ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT		UL(32)
 #define ID_AA64MMFR0_EL1_TGRAN16_2_WIDTH		UL(4)
-#define ID_AA64MMFR0_EL1_TGRAN16_2_TGRAN16		ULL(0x0)
-#define ID_AA64MMFR0_EL1_TGRAN16_2_NOT_SUPPORTED	ULL(0x1)
-#define ID_AA64MMFR0_EL1_TGRAN16_2_SUPPORTED		ULL(0x2)
-#define ID_AA64MMFR0_EL1_TGRAN16_2_LPA2			ULL(0x3)
+#define ID_AA64MMFR0_EL1_TGRAN16_2_TGRAN16		UL(0x0)
+#define ID_AA64MMFR0_EL1_TGRAN16_2_NOT_SUPPORTED	UL(0x1)
+#define ID_AA64MMFR0_EL1_TGRAN16_2_SUPPORTED		UL(0x2)
+#define ID_AA64MMFR0_EL1_TGRAN16_2_LPA2			UL(0x3)
 
 #define ID_AA64MMFR0_EL1_TGRAN4_SHIFT		UL(28)
 #define ID_AA64MMFR0_EL1_TGRAN4_WIDTH		UL(4)
-#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED	ULL(0x0)
-#define ID_AA64MMFR0_EL1_TGRAN4_LPA2		ULL(0x1)
-#define ID_AA64MMFR0_EL1_TGRAN4_NOT_SUPPORTED	ULL(0xf)
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED	UL(0x0)
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2		UL(0x1)
+#define ID_AA64MMFR0_EL1_TGRAN4_NOT_SUPPORTED	UL(0xf)
 
 #define ID_AA64MMFR0_EL1_TGRAN64_SHIFT		UL(24)
 #define ID_AA64MMFR0_EL1_TGRAN64_WIDTH		UL(4)
@@ -627,10 +649,17 @@
 	SCTLR_EL1_SA0 | SCTLR_EL1_SA)
 
 /* PMCR_EL0 Definitions */
-#define PMCR_EL0_LC_BIT		(UL(1) << 6)
+#define PMCR_EL0_N_SHIFT		11
+#define PMCR_EL0_N_WIDTH		5
+#define PMCR_EL0_LC_BIT			(UL(1) << 6)
+#define PMCR_EL0_DP_BIT			(UL(1) << 5)
+#define PMCR_EL0_C_BIT			(UL(1) << 2)
+#define PMCR_EL0_P_BIT			(UL(1) << 1)
+#define PMCR_EL0_E_BIT			(UL(1) << 0)
 
-#define PMCR_EL0_RES1		PMCR_EL0_LC_BIT
-
+#define PMCR_EL0_INIT			(PMCR_EL0_LC_BIT | PMCR_EL0_DP_BIT)
+#define PMCR_EL0_INIT_RESET		(PMCR_EL0_INIT | PMCR_EL0_C_BIT | \
+					 PMCR_EL0_P_BIT)
 
 /* MDSCR_EL1 Definitions */
 #define MDSCR_EL1_TDCC_BIT	(UL(1) << 12)
@@ -721,6 +750,10 @@
 				 CPTR_EL2_RES1)
 
 /* MDCR_EL2 definitions */
+#define MDCR_EL2_HPMFZS		(UL(1) << 36)
+#define MDCR_EL2_HPMFZO		(UL(1) << 29)
+#define MDCR_EL2_MTPME		(UL(1) << 28)
+#define MDCR_EL2_TDCC		(UL(1) << 27)
 #define MDCR_EL2_HLP		(UL(1) << 26)
 #define MDCR_EL2_HCCD		(UL(1) << 23)
 #define MDCR_EL2_TTRF		(UL(1) << 19)
@@ -735,9 +768,16 @@
 #define MDCR_EL2_HPME_BIT	(UL(1) << 7)
 #define MDCR_EL2_TPM_BIT	(UL(1) << 6)
 #define MDCR_EL2_TPMCR_BIT	(UL(1) << 5)
-#define MDCR_EL2_INIT		(MDCR_EL2_TPMCR_BIT \
-				| MDCR_EL2_TPM_BIT \
-				| MDCR_EL2_TDA_BIT)
+
+#define MDCR_EL2_HPMN_SHIFT	UL(0)
+#define MDCR_EL2_HPMN_WIDTH	UL(5)
+
+#define MDCR_EL2_INIT		(MDCR_EL2_MTPME		| \
+				 MDCR_EL2_HCCD		| \
+				 MDCR_EL2_HPMD		| \
+				 MDCR_EL2_TDA_BIT	| \
+				 MDCR_EL2_TPM_BIT	| \
+				 MDCR_EL2_TPMCR_BIT)
 
 /* MPIDR definitions */
 #define MPIDR_EL1_AFF_MASK	0xFF
diff --git a/lib/arch/include/arch_features.h b/lib/arch/include/arch_features.h
index 23fe6c0..8efed1e 100644
--- a/lib/arch/include/arch_features.h
+++ b/lib/arch/include/arch_features.h
@@ -51,7 +51,7 @@
 
 /*
  * Check if FEAT_LPA2 is implemented.
- * 4KB granule  at stage 2 supports 52-bit input and output addresses:
+ * 4KB granule at stage 2 supports 52-bit input and output addresses:
  * ID_AA64MMFR0_EL1.TGran4_2 bits [43:40]: 0b0011
  */
 static inline bool is_feat_lpa2_4k_present(void)
@@ -60,6 +60,16 @@
 		read_id_aa64mmfr0_el1()) == ID_AA64MMFR0_EL1_TGRAN4_2_LPA2);
 }
 
+/*
+ * Returns Performance Monitors Extension version.
+ * ID_AA64DFR0_EL1.PMUVer, bits [11:8]:
+ * 0b0000: Performance Monitors Extension not implemented
+ */
+static inline unsigned int read_pmu_version(void)
+{
+	return EXTRACT(ID_AA64DFR0_EL1_PMUVer, read_id_aa64dfr0_el1());
+}
+
 unsigned int arch_feat_get_pa_width(void);
 
 #endif /* ARCH_FEATURES_H */
diff --git a/lib/arch/include/arch_helpers.h b/lib/arch/include/arch_helpers.h
index 805e488..2338604 100644
--- a/lib/arch/include/arch_helpers.h
+++ b/lib/arch/include/arch_helpers.h
@@ -186,7 +186,85 @@
 DEFINE_SYSREG_RW_FUNCS(sp_el1)
 DEFINE_SYSREG_RW_FUNCS(elr_el12)
 DEFINE_SYSREG_RW_FUNCS(spsr_el12)
+
+DEFINE_SYSREG_RW_FUNCS(pmccfiltr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmccntr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmcntenclr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmcntenset_el0)
+DEFINE_SYSREG_RW_FUNCS(pmcr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmintenclr_el1)
+DEFINE_SYSREG_RW_FUNCS(pmintenset_el1)
+DEFINE_SYSREG_RW_FUNCS(pmovsclr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmovsset_el0)
+DEFINE_SYSREG_RW_FUNCS(pmselr_el0)
 DEFINE_SYSREG_RW_FUNCS(pmuserenr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmxevcntr_el0)
+DEFINE_SYSREG_RW_FUNCS(pmxevtyper_el0)
+
+DEFINE_SYSREG_RW_FUNCS(pmevcntr0_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr1_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr2_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr3_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr4_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr5_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr6_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr7_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr8_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr9_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr10_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr11_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr12_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr13_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr14_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr15_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr16_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr17_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr18_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr19_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr20_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr21_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr22_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr23_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr24_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr25_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr26_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr27_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr28_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr29_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevcntr30_el0)
+
+DEFINE_SYSREG_RW_FUNCS(pmevtyper0_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper1_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper2_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper3_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper4_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper5_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper6_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper7_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper8_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper9_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper10_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper11_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper12_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper13_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper14_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper15_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper16_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper17_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper18_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper19_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper20_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper21_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper22_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper23_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper24_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper25_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper26_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper27_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper28_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper29_el0)
+DEFINE_SYSREG_RW_FUNCS(pmevtyper30_el0)
+
 DEFINE_SYSREG_RW_FUNCS(tpidrro_el0)
 DEFINE_SYSREG_RW_FUNCS(tpidr_el0)
 DEFINE_SYSREG_RW_FUNCS(tpidr_el2)
@@ -226,12 +304,17 @@
 DEFINE_RENAME_SYSREG_READ_FUNC(ID_AA64MMFR2_EL1, id_aa64mmfr2_el1)
 DEFINE_RENAME_SYSREG_READ_FUNC(ID_AA64PFR0_EL1, id_aa64pfr0_el1)
 DEFINE_RENAME_SYSREG_READ_FUNC(ID_AA64PFR1_EL1, id_aa64pfr1_el1)
-DEFINE_RENAME_SYSREG_RW_FUNCS(icc_hppir1_el1, ICC_HPPIR1_EL1)
+DEFINE_SYSREG_READ_FUNC(id_aa64afr0_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64afr1_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64dfr0_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64dfr1_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64isar0_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64isar1_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64mmfr0_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64mmfr1_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64mmfr2_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64pfr0_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64pfr1_el1)
 DEFINE_RENAME_SYSREG_RW_FUNCS(mpam0_el1, MPAM0_EL1)
 DEFINE_SYSREG_READ_FUNC(id_afr0_el1)
 DEFINE_SYSREG_READ_FUNC(CurrentEl)
@@ -303,7 +386,6 @@
 
 DEFINE_SYSREG_RW_FUNCS(mdcr_el2)
 DEFINE_SYSREG_RW_FUNCS(hstr_el2)
-DEFINE_SYSREG_RW_FUNCS(pmcr_el0)
 DEFINE_SYSREG_RW_FUNCS(mpam2_el2)
 DEFINE_SYSREG_RW_FUNCS(mpamhcr_el2)
 DEFINE_SYSREG_RW_FUNCS(pmscr_el2)
@@ -335,7 +417,7 @@
  ******************************************************************************/
 DEFINE_RENAME_SYSREG_RW_FUNCS(icc_sre_el2, ICC_SRE_EL2)
 DEFINE_RENAME_SYSREG_RW_FUNCS(icc_ctrl_el1, ICC_CTLR_EL1)
-DEFINE_RENAME_SYSREG_READ_FUNC(icc_hppir1_el2, ICC_HPPIR1_EL1)
+DEFINE_RENAME_SYSREG_RW_FUNCS(icc_hppir1_el1, ICC_HPPIR1_EL1)
 
 /*******************************************************************************
  * Virtual GIC register accessor prototypes
diff --git a/lib/arch/include/pmu.h b/lib/arch/include/pmu.h
new file mode 100644
index 0000000..5d1588d
--- /dev/null
+++ b/lib/arch/include/pmu.h
@@ -0,0 +1,47 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
+ */
+
+#ifndef PMU_H
+#define PMU_H
+
+#include <arch_helpers.h>
+#include <utils_def.h>
+
+struct rmi_rec_exit;
+struct rec;
+
+struct pmev_regs {
+	unsigned long pmevcntr_el0;
+	unsigned long pmevtyper_el0;
+};
+
+/*
+ * PMU context structure.
+ * Align on cache writeback granule to minimise cache line
+ * thashing when allocated as an array for use by each CPU.
+ */
+struct pmu_state {
+	unsigned long pmccfiltr_el0;
+	unsigned long pmccntr_el0;
+	unsigned long pmcntenset_el0;
+	unsigned long pmcntenclr_el0;
+	unsigned long pmintenset_el1;
+	unsigned long pmintenclr_el1;
+	unsigned long pmovsset_el0;
+	unsigned long pmovsclr_el0;
+	unsigned long pmselr_el0;
+	unsigned long pmuserenr_el0;
+	unsigned long pmxevcntr_el0;
+	unsigned long pmxevtyper_el0;
+
+	struct pmev_regs pmev_regs[31];
+
+} __aligned(CACHE_WRITEBACK_GRANULE);
+
+void pmu_save_state(struct pmu_state *pmu, unsigned int num_cnts);
+void pmu_restore_state(struct pmu_state *pmu, unsigned int num_cnts);
+void pmu_update_rec_exit(struct rmi_rec_exit *rec_exit);
+
+#endif /* PMU_H */
diff --git a/lib/arch/src/pmu.c b/lib/arch/src/pmu.c
new file mode 100644
index 0000000..53c6f6b
--- /dev/null
+++ b/lib/arch/src/pmu.c
@@ -0,0 +1,155 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
+ */
+
+#include <arch_features.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <pmu.h>
+#include <smc-rmi.h>
+
+/* Clear bits P0-P30, C and F0 */
+#define PMU_CLEAR_ALL	0x1FFFFFFFF
+
+#define READ_PMEV_EL0(n) {					     \
+	case n:							     \
+	pmu->pmev_regs[n].pmevcntr_el0 = read_pmevcntr##n##_el0();   \
+	pmu->pmev_regs[n].pmevtyper_el0 = read_pmevtyper##n##_el0(); \
+}
+
+#define WRITE_PMEV_EL0(n) {					   \
+	case n:							   \
+	write_pmevcntr##n##_el0(pmu->pmev_regs[n].pmevcntr_el0);   \
+	write_pmevtyper##n##_el0(pmu->pmev_regs[n].pmevtyper_el0); \
+}
+
+/*
+ * Save PMU context to memory with number of event counters
+ * passed in 'num_cnts' and disable all event counters.
+ */
+void pmu_save_state(struct pmu_state *pmu, unsigned int num_cnts)
+{
+	assert(pmu != NULL);
+
+	pmu->pmccfiltr_el0 = read_pmccfiltr_el0();
+	pmu->pmccntr_el0 = read_pmccntr_el0();
+	pmu->pmcntenset_el0 = read_pmcntenset_el0();
+	pmu->pmcntenclr_el0 = read_pmcntenclr_el0();
+	pmu->pmintenset_el1 = read_pmintenset_el1();
+	pmu->pmintenclr_el1 = read_pmintenclr_el1();
+	pmu->pmovsset_el0 = read_pmovsset_el0();
+	pmu->pmovsclr_el0 = read_pmovsclr_el0();
+	pmu->pmselr_el0 = read_pmselr_el0();
+	pmu->pmuserenr_el0 = read_pmuserenr_el0();
+	pmu->pmxevcntr_el0 = read_pmxevcntr_el0();
+	pmu->pmxevtyper_el0 = read_pmxevtyper_el0();
+
+	if (num_cnts != 0UL) {
+		switch (--num_cnts) {
+		READ_PMEV_EL0(30);
+		READ_PMEV_EL0(29);
+		READ_PMEV_EL0(28);
+		READ_PMEV_EL0(27);
+		READ_PMEV_EL0(26);
+		READ_PMEV_EL0(25);
+		READ_PMEV_EL0(24);
+		READ_PMEV_EL0(23);
+		READ_PMEV_EL0(22);
+		READ_PMEV_EL0(21);
+		READ_PMEV_EL0(20);
+		READ_PMEV_EL0(19);
+		READ_PMEV_EL0(18);
+		READ_PMEV_EL0(17);
+		READ_PMEV_EL0(16);
+		READ_PMEV_EL0(15);
+		READ_PMEV_EL0(14);
+		READ_PMEV_EL0(13);
+		READ_PMEV_EL0(12);
+		READ_PMEV_EL0(11);
+		READ_PMEV_EL0(10);
+		READ_PMEV_EL0(9);
+		READ_PMEV_EL0(8);
+		READ_PMEV_EL0(7);
+		READ_PMEV_EL0(6);
+		READ_PMEV_EL0(5);
+		READ_PMEV_EL0(4);
+		READ_PMEV_EL0(3);
+		READ_PMEV_EL0(2);
+		READ_PMEV_EL0(1);
+		default:
+			pmu->pmev_regs[0].pmevcntr_el0 = read_pmevcntr0_el0();
+			pmu->pmev_regs[0].pmevtyper_el0 = read_pmevtyper0_el0();
+		}
+	}
+}
+
+/*
+ * Restore PMU context from memory with
+ * number of event counters passed in 'num_cnts'.
+ */
+void pmu_restore_state(struct pmu_state *pmu, unsigned int num_cnts)
+{
+	assert(pmu != NULL);
+
+	write_pmcntenset_el0(pmu->pmcntenset_el0);
+	write_pmcntenclr_el0(pmu->pmcntenclr_el0 ^ PMU_CLEAR_ALL);
+	write_pmintenset_el1(pmu->pmintenset_el1);
+	write_pmintenclr_el1(pmu->pmintenclr_el1 ^ PMU_CLEAR_ALL);
+	write_pmovsset_el0(pmu->pmovsset_el0);
+	write_pmovsclr_el0(pmu->pmovsclr_el0 ^ PMU_CLEAR_ALL);
+	write_pmselr_el0(pmu->pmselr_el0);
+	write_pmuserenr_el0(pmu->pmuserenr_el0);
+	write_pmxevcntr_el0(pmu->pmxevcntr_el0);
+	write_pmxevtyper_el0(pmu->pmxevtyper_el0);
+
+	if (num_cnts != 0U) {
+		switch (--num_cnts) {
+		WRITE_PMEV_EL0(30);
+		WRITE_PMEV_EL0(29);
+		WRITE_PMEV_EL0(28);
+		WRITE_PMEV_EL0(27);
+		WRITE_PMEV_EL0(26);
+		WRITE_PMEV_EL0(25);
+		WRITE_PMEV_EL0(24);
+		WRITE_PMEV_EL0(23);
+		WRITE_PMEV_EL0(22);
+		WRITE_PMEV_EL0(21);
+		WRITE_PMEV_EL0(20);
+		WRITE_PMEV_EL0(19);
+		WRITE_PMEV_EL0(18);
+		WRITE_PMEV_EL0(17);
+		WRITE_PMEV_EL0(16);
+		WRITE_PMEV_EL0(15);
+		WRITE_PMEV_EL0(14);
+		WRITE_PMEV_EL0(13);
+		WRITE_PMEV_EL0(12);
+		WRITE_PMEV_EL0(11);
+		WRITE_PMEV_EL0(10);
+		WRITE_PMEV_EL0(9);
+		WRITE_PMEV_EL0(8);
+		WRITE_PMEV_EL0(7);
+		WRITE_PMEV_EL0(6);
+		WRITE_PMEV_EL0(5);
+		WRITE_PMEV_EL0(4);
+		WRITE_PMEV_EL0(3);
+		WRITE_PMEV_EL0(2);
+		WRITE_PMEV_EL0(1);
+		default:
+			write_pmevcntr0_el0(pmu->pmev_regs[0].pmevcntr_el0);
+			write_pmevtyper0_el0(pmu->pmev_regs[0].pmevtyper_el0);
+		}
+	}
+}
+
+/*
+ * Expose Realm PMU state on REC exit.
+ */
+void pmu_update_rec_exit(struct rmi_rec_exit *rec_exit)
+{
+	assert(rec_exit != NULL);
+
+	rec_exit->pmu_ovf = read_pmovsset_el0();
+	rec_exit->pmu_intr_en = read_pmintenset_el1();
+	rec_exit->pmu_cntr_en = read_pmcntenset_el0();
+}
diff --git a/lib/realm/include/realm.h b/lib/realm/include/realm.h
index 24c0202..cd1ec79 100644
--- a/lib/realm/include/realm.h
+++ b/lib/realm/include/realm.h
@@ -72,6 +72,12 @@
 	/* Algorithm to use for measurements */
 	enum hash_algo algorithm;
 
+	/* PMU enabled flag */
+	bool pmu_enabled;
+
+	/* Number of PMU counters */
+	unsigned int pmu_num_cnts;
+
 	/* Realm measurement */
 	unsigned char measurement[MEASUREMENT_SLOT_NR][MAX_MEASUREMENT_SIZE];
 
diff --git a/lib/realm/include/rec.h b/lib/realm/include/rec.h
index 457226d..c1677e4 100644
--- a/lib/realm/include/rec.h
+++ b/lib/realm/include/rec.h
@@ -13,6 +13,7 @@
 #include <fpu_helpers.h>
 #include <gic.h>
 #include <memory_alloc.h>
+#include <pmu.h>
 #include <ripas.h>
 #include <sizes.h>
 #include <smc-rmi.h>
@@ -29,7 +30,6 @@
 	unsigned long elr_el1;
 	unsigned long spsr_el1;
 	unsigned long pmcr_el0;
-	unsigned long pmuserenr_el0;
 	unsigned long tpidrro_el0;
 	unsigned long tpidr_el0;
 	unsigned long csselr_el1;
@@ -84,6 +84,7 @@
 	unsigned long vttbr_el2;
 	unsigned long vtcr_el2;
 	unsigned long hcr_el2;
+	unsigned long mdcr_el2;
 };
 
 /*
@@ -96,25 +97,27 @@
 	unsigned long icc_sre_el2;
 	struct fpu_state *fpu; /* FPU/SVE saved lazily. */
 	struct sve_state *sve;
+	struct pmu_state *pmu;
 } __attribute__((aligned(CACHE_WRITEBACK_GRANULE)));
 
 /*
- * This structure contains pointers to data that is allocated
- * in auxilary granules.
+ * This structure contains pointers to data that are allocated
+ * in auxilary granules for a REC.
  */
 struct rec_aux_data {
-	uint8_t *attest_heap_buf; /* Pointer to the heap buffer of this REC. */
+	uint8_t *attest_heap_buf; /* pointer to the heap buffer */
+	struct pmu_state *pmu;	  /* pointer to PMU state */
 };
 
-/* This structure is used for storing FPU/SIMD context for realm. */
+/* This structure is used for storing FPU/SIMD context for REC */
 struct rec_fpu_context {
 	struct fpu_state fpu;
 	bool used;
 };
 
 struct rec {
-	struct granule *g_rec; /* the granule in which this rec lives */
-	unsigned long rec_idx; /* Which rec is this */
+	struct granule *g_rec;	/* the granule in which this REC lives */
+	unsigned long rec_idx;	/* which REC is this */
 	bool runnable;
 
 	unsigned long regs[31];
@@ -139,6 +142,8 @@
 		int s2_starting_level;
 		struct granule *g_rtt;
 		struct granule *g_rd;
+		bool pmu_enabled;
+		unsigned int pmu_num_cnts;
 	} realm_info;
 
 	struct {
@@ -153,7 +158,7 @@
 		unsigned long far;
 	} last_run_info;
 
-	/* Structure for storing FPU/SIMD context for realm. */
+	/* Structure for storing FPU/SIMD context for Realm */
 	struct rec_fpu_context fpu_ctx;
 
 	/* Pointer to per-cpu non-secure state */
diff --git a/lib/smc/include/smc-rmi.h b/lib/smc/include/smc-rmi.h
index 678036b..6c6268b 100644
--- a/lib/smc/include/smc-rmi.h
+++ b/lib/smc/include/smc-rmi.h
@@ -429,7 +429,16 @@
 			unsigned char ripas_value;	/* 0x510 */
 		   }, 0x500, 0x600);
 	/* Host call immediate value */
-	SET_MEMBER_RMI(unsigned int imm, 0x600, 0x800);	/* 0x600 */
+	SET_MEMBER_RMI(unsigned int imm, 0x600, 0x700);	/* 0x600 */
+
+	/* PMU overflow */
+	SET_MEMBER_RMI(unsigned long pmu_ovf, 0x700, 0x708);	 /* 0x700 */
+
+	/* PMU interrupt enable */
+	SET_MEMBER_RMI(unsigned long pmu_intr_en, 0x708, 0x710); /* 0x708 */
+
+	/* PMU counter enable */
+	SET_MEMBER_RMI(unsigned long pmu_cntr_en, 0x710, 0x800); /* 0x710 */
 };
 
 /*
@@ -440,7 +449,7 @@
 	/* Entry information */
 	SET_MEMBER_RMI(struct rmi_rec_entry entry, 0, 0x800);	/* Offset 0 */
 	/* Exit information */
-	SET_MEMBER_RMI(struct rmi_rec_exit exit, 0x800, 0x1000);	/* 0x800 */
+	SET_MEMBER_RMI(struct rmi_rec_exit exit, 0x800, 0x1000);/* 0x800 */
 };
 
 #endif /* __ASSEMBLER__ */
diff --git a/lib/smc/src/smc-rmi-offsets.c b/lib/smc/src/smc-rmi-offsets.c
index 4750dac..9b594c2 100644
--- a/lib/smc/src/smc-rmi-offsets.c
+++ b/lib/smc/src/smc-rmi-offsets.c
@@ -48,6 +48,9 @@
 COMPILER_ASSERT(offsetof(struct rmi_rec_exit, ripas_size) == 0x508);
 COMPILER_ASSERT(offsetof(struct rmi_rec_exit, ripas_value) == 0x510);
 COMPILER_ASSERT(offsetof(struct rmi_rec_exit, imm) == 0x600);
+COMPILER_ASSERT(offsetof(struct rmi_rec_exit, pmu_ovf) == 0x700);
+COMPILER_ASSERT(offsetof(struct rmi_rec_exit, pmu_intr_en) == 0x708);
+COMPILER_ASSERT(offsetof(struct rmi_rec_exit, pmu_cntr_en) == 0x710);
 
 COMPILER_ASSERT(sizeof(struct rmi_rec_run) <= GRANULE_SIZE);
 COMPILER_ASSERT(offsetof(struct rmi_rec_run, entry) == 0);
diff --git a/lib/timers/src/timers.c b/lib/timers/src/timers.c
index f16a6ad..fa76307 100644
--- a/lib/timers/src/timers.c
+++ b/lib/timers/src/timers.c
@@ -58,7 +58,7 @@
 
 	/*
 	 * We don't want to run the Realm just to immediately exit due a
-	 * physical interrupt casused by one of the timer interrupts not having
+	 * physical interrupt caused by one of the timer interrupts not having
 	 * been retired from the CPU interface yet. Check that the interrupts
 	 * are retired before entering the Realm.
 	 */
diff --git a/plat/host/common/src/host_utils.c b/plat/host/common/src/host_utils.c
index 263f518..910bb6a 100644
--- a/plat/host/common/src/host_utils.c
+++ b/plat/host/common/src/host_utils.c
@@ -137,6 +137,13 @@
 	int ret;
 
 	/*
+	 * Initialize ID_AA64DFR0_EL1 with PMUVer field to PMUv3p7.
+	 * (ID_AA64DFR0_EL1.PMUVer, bits [11:8] set to 7)
+	 */
+	ret = host_util_set_default_sysreg_cb("id_aa64dfr0_el1",
+			INPLACE(ID_AA64DFR0_EL1_PMUVer, 7UL));
+
+	/*
 	 * Initialize ID_AA64MMFR0_EL1 with a physical address
 	 * range of 48 bits (PARange bits set to 0b0101)
 	 */
@@ -167,6 +174,13 @@
 	ret = host_util_set_default_sysreg_cb("elr_el2", 0UL);
 
 	/*
+	 * Set number of event counters implemented to 31.
+	 * (PMCR_EL0.N, bits [15:11] set to 31)
+	 */
+	ret = host_util_set_default_sysreg_cb("pmcr_el0",
+			INPLACE(PMCR_EL0_N, 31UL));
+
+	/*
 	 * Only check the return value of the last callback setup, to detect
 	 * if we are out of callback slots.
 	 */
diff --git a/runtime/core/init.c b/runtime/core/init.c
index c067d12..81bdfeb 100644
--- a/runtime/core/init.c
+++ b/runtime/core/init.c
@@ -3,7 +3,7 @@
  * SPDX-FileCopyrightText: Copyright TF-RMM Contributors.
  */
 
-#include <arch_helpers.h>
+#include <arch_features.h>
 #include <attestation.h>
 #include <buffer.h>
 #include <debug.h>
@@ -28,7 +28,10 @@
 	SPE(write_pmscr_el2(PMSCR_EL2_INIT));
 
 	write_cnthctl_el2(CNTHCTL_EL2_INIT);
-	write_mdcr_el2(MDCR_EL2_INIT);
+	write_vpidr_el2(read_midr_el1());
+	write_mdcr_el2(MDCR_EL2_INIT |
+			INPLACE(MDCR_EL2_HPMN,
+			EXTRACT(PMCR_EL0_N, read_pmcr_el0())));
 }
 
 void rmm_warmboot_main(void)
diff --git a/runtime/core/run.c b/runtime/core/run.c
index f136947..64f4b93 100644
--- a/runtime/core/run.c
+++ b/runtime/core/run.c
@@ -11,6 +11,7 @@
 #include <cpuid.h>
 #include <exit.h>
 #include <fpu_helpers.h>
+#include <pmu.h>
 #include <rec.h>
 #include <run.h>
 #include <smc-rmi.h>
@@ -19,7 +20,8 @@
 
 static struct ns_state g_ns_data[MAX_CPUS];
 static uint8_t g_sve_data[MAX_CPUS][sizeof(struct sve_state)]
-		__attribute__((aligned(sizeof(__uint128_t))));
+		__aligned(sizeof(__uint128_t));
+static struct pmu_state g_pmu_data[MAX_CPUS];
 
 /*
  * Initialize the aux data and any buffer pointers to the aux granule memory for
@@ -29,10 +31,12 @@
 			  void *rec_aux,
 			  unsigned int num_rec_aux)
 {
-	aux_data->attest_heap_buf = (uint8_t *)rec_aux;
-
 	/* Ensure we have enough aux granules for use by REC */
-	assert(num_rec_aux >= REC_HEAP_PAGES);
+	assert(num_rec_aux >= REC_NUM_PAGES);
+
+	aux_data->attest_heap_buf = (uint8_t *)rec_aux;
+	aux_data->pmu = (struct pmu_state *)((uint8_t *)rec_aux +
+						REC_HEAP_SIZE);
 }
 
 /*
@@ -69,7 +73,6 @@
 	sysregs->elr_el1 = read_elr_el12();
 	sysregs->spsr_el1 = read_spsr_el12();
 	sysregs->pmcr_el0 = read_pmcr_el0();
-	sysregs->pmuserenr_el0 = read_pmuserenr_el0();
 	sysregs->tpidrro_el0 = read_tpidrro_el0();
 	sysregs->tpidr_el0 = read_tpidr_el0();
 	sysregs->csselr_el1 = read_csselr_el1();
@@ -105,7 +108,7 @@
 	sysregs->cntv_cval_el0 = read_cntv_cval_el02();
 }
 
-static void save_realm_state(struct rec *rec)
+static void save_realm_state(struct rec *rec, struct rmi_rec_exit *rec_exit)
 {
 	save_sysreg_state(&rec->sysregs);
 
@@ -113,6 +116,15 @@
 	rec->pstate = read_spsr_el2();
 
 	gic_save_state(&rec->sysregs.gicstate);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Expose PMU Realm state to NS */
+		pmu_update_rec_exit(rec_exit);
+
+		/* Save PMU context */
+		pmu_save_state(rec->aux_data.pmu,
+				rec->realm_info.pmu_num_cnts);
+	}
 }
 
 static void restore_sysreg_state(struct sysreg_state *sysregs)
@@ -122,7 +134,6 @@
 	write_elr_el12(sysregs->elr_el1);
 	write_spsr_el12(sysregs->spsr_el1);
 	write_pmcr_el0(sysregs->pmcr_el0);
-	write_pmuserenr_el0(sysregs->pmuserenr_el0);
 	write_tpidrro_el0(sysregs->tpidrro_el0);
 	write_tpidr_el0(sysregs->tpidr_el0);
 	write_csselr_el1(sysregs->csselr_el1);
@@ -166,6 +177,12 @@
 	write_cntv_ctl_el02(sysregs->cntv_ctl_el0);
 }
 
+static void configure_realm_stage2(struct rec *rec)
+{
+	write_vtcr_el2(rec->common_sysregs.vtcr_el2);
+	write_vttbr_el2(rec->common_sysregs.vttbr_el2);
+}
+
 static void restore_realm_state(struct rec *rec)
 {
 	/*
@@ -177,21 +194,29 @@
 	isb();
 
 	restore_sysreg_state(&rec->sysregs);
+
 	write_elr_el2(rec->pc);
 	write_spsr_el2(rec->pstate);
 	write_hcr_el2(rec->sysregs.hcr_el2);
 
+	/* Control trapping of accesses to PMU registers */
+	write_mdcr_el2(rec->common_sysregs.mdcr_el2);
+
 	gic_restore_state(&rec->sysregs.gicstate);
+
+	configure_realm_stage2(rec);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Restore PMU context */
+		pmu_restore_state(rec->aux_data.pmu,
+				  rec->realm_info.pmu_num_cnts);
+	}
 }
 
-static void configure_realm_stage2(struct rec *rec)
+static void save_ns_state(struct rec *rec)
 {
-	write_vtcr_el2(rec->common_sysregs.vtcr_el2);
-	write_vttbr_el2(rec->common_sysregs.vttbr_el2);
-}
+	struct ns_state *ns_state = rec->ns;
 
-static void save_ns_state(struct ns_state *ns_state)
-{
 	save_sysreg_state(&ns_state->sysregs);
 
 	/*
@@ -202,10 +227,17 @@
 	ns_state->sysregs.cnthctl_el2 = read_cnthctl_el2();
 
 	ns_state->icc_sre_el2 = read_icc_sre_el2();
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Save PMU context */
+		pmu_save_state(ns_state->pmu, rec->realm_info.pmu_num_cnts);
+	}
 }
 
-static void restore_ns_state(struct ns_state *ns_state)
+static void restore_ns_state(struct rec *rec)
 {
+	struct ns_state *ns_state = rec->ns;
+
 	restore_sysreg_state(&ns_state->sysregs);
 
 	/*
@@ -216,6 +248,12 @@
 	write_cnthctl_el2(ns_state->sysregs.cnthctl_el2);
 
 	write_icc_sre_el2(ns_state->icc_sre_el2);
+
+	if (rec->realm_info.pmu_enabled) {
+		/* Restore PMU state */
+		pmu_restore_state(ns_state->pmu,
+				  rec->realm_info.pmu_num_cnts);
+	}
 }
 
 static void activate_events(struct rec *rec)
@@ -243,14 +281,18 @@
 	void *rec_aux;
 	unsigned int cpuid = my_cpuid();
 
-	assert(rec->ns == NULL);
-
 	assert(cpuid < MAX_CPUS);
+	assert(rec->ns == NULL);
+	assert(rec->fpu_ctx.used == false);
+
 	ns_state = &g_ns_data[cpuid];
 
-	/* ensure SVE/FPU context is cleared */
+	/* Ensure SVE/FPU and PMU context is cleared */
 	assert(ns_state->sve == NULL);
 	assert(ns_state->fpu == NULL);
+	assert(ns_state->pmu == NULL);
+
+	rec->ns = ns_state;
 
 	/* Map auxiliary granules */
 	rec_aux = map_rec_aux(rec->g_aux, rec->num_rec_aux);
@@ -270,7 +312,7 @@
 	 */
 	if (!rec->alloc_info.ctx_initialised) {
 		(void)attestation_heap_ctx_init(rec->aux_data.attest_heap_buf,
-						REC_HEAP_PAGES * SZ_4K);
+						REC_HEAP_SIZE);
 		rec->alloc_info.ctx_initialised = true;
 	}
 
@@ -280,15 +322,11 @@
 		ns_state->fpu = (struct fpu_state *)&g_sve_data[cpuid];
 	}
 
-	save_ns_state(ns_state);
+	ns_state->pmu = &g_pmu_data[cpuid];
+
+	save_ns_state(rec);
 	restore_realm_state(rec);
 
-	/* Prepare for lazy save/restore of FPU/SIMD registers. */
-	rec->ns = ns_state;
-	assert(rec->fpu_ctx.used == false);
-
-	configure_realm_stage2(rec);
-
 	do {
 		/*
 		 * We must check the status of the arch timers in every
@@ -335,22 +373,23 @@
 		rec->fpu_ctx.used = false;
 	}
 
+	report_timer_state_to_ns(rec_exit);
+
+	save_realm_state(rec, rec_exit);
+	restore_ns_state(rec);
+
 	/*
-	 * Clear FPU/SVE context while exiting
+	 * Clear FPU/SVE and PMU context while exiting
 	 */
 	ns_state->sve = NULL;
 	ns_state->fpu = NULL;
+	ns_state->pmu = NULL;
 
 	/*
 	 * Clear NS pointer since that struct is local to this function.
 	 */
 	rec->ns = NULL;
 
-	report_timer_state_to_ns(rec_exit);
-
-	save_realm_state(rec);
-	restore_ns_state(ns_state);
-
 	/* Undo the heap association */
 	attestation_heap_ctx_unassign_pe();
 	/* Unmap auxiliary granules */
diff --git a/runtime/core/sysregs.c b/runtime/core/sysregs.c
index c45b474..940b0b4 100644
--- a/runtime/core/sysregs.c
+++ b/runtime/core/sysregs.c
@@ -36,30 +36,32 @@
  * - Debug architecture version:
  *   set in ID_AA64DFR0_EL1_SET
  * - Trace unit System registers not implemented
- * - PMU is not implemented
  * - Number of breakpoints:
  *   set in ID_AA64DFR0_EL1_SET
+ * - PMU Snapshot extension not implemented
  * - Number of watchpoints:
  *   set in ID_AA64DFR0_EL1_SET
+ * - Synchronous-exception-based event profiling not implemented
  * - Number of breakpoints that are context-aware
  * - Statistical Profiling Extension not implemented
  * - Armv8.4 Self-hosted Trace Extension not implemented
  * - Trace Buffer Extension not implemented
- * - FEAT_MTPMU not implemented
  * - Branch Record Buffer Extension not implemented
+ * - Trace Buffer External Mode not implemented
  */
 #define ID_AA64DFR0_EL1_CLEAR			  \
 	MASK(ID_AA64DFR0_EL1_DebugVer)		| \
 	MASK(ID_AA64DFR0_EL1_TraceVer)		| \
-	MASK(ID_AA64DFR0_EL1_PMUVer)		| \
 	MASK(ID_AA64DFR0_EL1_BRPs)		| \
+	MASK(ID_AA64DFR0_EL1_PMSS)		| \
 	MASK(ID_AA64DFR0_EL1_WRPs)		| \
+	MASK(ID_AA64DFR0_EL1_SEBEP)		| \
 	MASK(ID_AA64DFR0_EL1_CTX_CMPS)		| \
 	MASK(ID_AA64DFR0_EL1_PMSVer)		| \
 	MASK(ID_AA64DFR0_EL1_TraceFilt)		| \
 	MASK(ID_AA64DFR0_EL1_TraceBuffer)	| \
-	MASK(ID_AA64DFR0_EL1_MTPMU)		| \
-	MASK(ID_AA64DFR0_EL1_BRBE)
+	MASK(ID_AA64DFR0_EL1_BRBE)		| \
+	MASK(ID_AA64DFR0_EL1_ExtTrcBuff)
 
 /*
  * Set fields:
@@ -67,12 +69,23 @@
  * - Number of breakpoints: 2
  * - Number of watchpoints: 2
  */
-#define ID_AA64DFR0_EL1_SET			  \
-	ID_AA64DFR0_EL1_DebugVer_8		| \
-	INPLACE(ID_AA64DFR0_EL1_BRPs, 1UL)	| \
+#define ID_AA64DFR0_EL1_SET						  \
+	INPLACE(ID_AA64DFR0_EL1_DebugVer, ID_AA64DFR0_EL1_Debugv8)	| \
+	INPLACE(ID_AA64DFR0_EL1_BRPs, 1UL)				| \
 	INPLACE(ID_AA64DFR0_EL1_WRPs, 1UL)
 
 /*
+ * ID_AA64DFR1_EL1:
+ *
+ * Cleared fields:
+ * - Exception-based event profiling not implemented
+ * - PMU fixed-function instruction counter not implemented
+ */
+#define ID_AA64DFR1_EL1_CLEAR		  \
+	MASK(ID_AA64DFR1_EL1_EBEP)	| \
+	MASK(ID_AA64DFR1_EL1_ICNTR)
+
+/*
  * ID_AA64ISAR1_EL1:
  *
  * Cleared fields:
@@ -147,7 +160,7 @@
 		value = SYSREG_READ_CLEAR_SET(DFR0);
 		break;
 	SYSREG_CASE(DFR1)
-		value = SYSREG_READ(DFR1);
+		value = SYSREG_READ_CLEAR(DFR1);
 		break;
 	SYSREG_CASE(ISAR0)
 		value = SYSREG_READ(ISAR0);
diff --git a/runtime/rmi/feature.c b/runtime/rmi/feature.c
index a5da413..a5c4a6a 100644
--- a/runtime/rmi/feature.c
+++ b/runtime/rmi/feature.c
@@ -9,8 +9,9 @@
 #include <smc-handler.h>
 #include <smc-rmi.h>
 #include <status.h>
+#include <utils_def.h>
 
-#define RMM_FEATURE_MIN_IPA_SIZE		PARANGE_0000_WIDTH
+#define RMM_FEATURE_MIN_IPA_SIZE	PARANGE_0000_WIDTH
 
 static unsigned long get_feature_register_0(void)
 {
@@ -29,10 +30,16 @@
 	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_HASH_SHA_512,
 				RMI_SUPPORTED);
 
-	/* PMU is not supported */
+	/* RMM supports PMUv3p7+ */
+	assert(read_pmu_version() >= ID_AA64DFR0_EL1_PMUv3p7);
+
+	/* Set support for PMUv3 */
 	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_EN,
-				RMI_NOT_SUPPORTED);
-	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, 0U);
+				RMI_SUPPORTED);
+
+	/* Set number of PMU counters available */
+	feat_reg0 |= INPLACE(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS,
+				EXTRACT(PMCR_EL0_N, read_pmcr_el0()));
 
 	return feat_reg0;
 }
@@ -63,13 +70,19 @@
 
 	/* Validate LPA2 flag */
 	if ((EXTRACT(RMM_FEATURE_REGISTER_0_LPA2, value) == RMI_LPA2) &&
-	    !is_feat_lpa2_4k_present()) {
+	    (EXTRACT(RMM_FEATURE_REGISTER_0_LPA2, feat_reg0) == RMI_NO_LPA2)) {
 		return false;
 	}
 
-	/* Validate PMU_EN flag */
-	if ((EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, value) == RMI_SUPPORTED) ||
-	    (EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, value) != 0U)) {
+	/*
+	 * Skip validation of RMM_FEATURE_REGISTER_0_PMU_EN flag
+	 * as RMM always assumes that PMUv3p7+ is present.
+	 */
+
+	/* Validate number of PMU counters if PMUv3 is enabled */
+	if ((EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, value) == RMI_SUPPORTED) &&
+	    (EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, value) !=
+	     EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, feat_reg0))) {
 		return false;
 	}
 
diff --git a/runtime/rmi/realm.c b/runtime/rmi/realm.c
index b348e90..f8f59fc 100644
--- a/runtime/rmi/realm.c
+++ b/runtime/rmi/realm.c
@@ -297,7 +297,7 @@
 	rd->s2_ctx.ipa_bits = requested_ipa_bits(&p);
 	rd->s2_ctx.s2_starting_level = p.rtt_level_start;
 	rd->s2_ctx.num_root_rtts = p.rtt_num_start;
-	memcpy(&rd->rpv[0], &p.rpv[0], RPV_SIZE);
+	(void)memcpy(&rd->rpv[0], &p.rpv[0], RPV_SIZE);
 
 	rd->s2_ctx.vmid = (unsigned int)p.vmid;
 
@@ -315,6 +315,10 @@
 		rd->algorithm = HASH_ALGO_SHA512;
 		break;
 	}
+
+	rd->pmu_enabled = EXTRACT(RMM_FEATURE_REGISTER_0_PMU_EN, p.features_0);
+	rd->pmu_num_cnts = EXTRACT(RMM_FEATURE_REGISTER_0_PMU_NUM_CTRS, p.features_0);
+
 	realm_params_measure(rd, &p);
 
 	buffer_unmap(rd);
diff --git a/runtime/rmi/rec.c b/runtime/rmi/rec.c
index 5f82ad1..4bb4ebe 100644
--- a/runtime/rmi/rec.c
+++ b/runtime/rmi/rec.c
@@ -74,11 +74,12 @@
 static void init_rec_sysregs(struct rec *rec, unsigned long mpidr)
 {
 	/* Set non-zero values only */
-	rec->sysregs.pmcr_el0 = PMCR_EL0_RES1;
+	rec->sysregs.pmcr_el0 = rec->realm_info.pmu_enabled ?
+				PMCR_EL0_INIT_RESET : PMCR_EL0_INIT;
+
 	rec->sysregs.sctlr_el1 = SCTLR_EL1_FLAGS;
 	rec->sysregs.mdscr_el1 = MDSCR_EL1_TDCC_BIT;
 	rec->sysregs.vmpidr_el2 = mpidr | VMPIDR_EL2_RES1;
-
 	rec->sysregs.cnthctl_el2 = CNTHCTL_EL2_NO_TRAPS;
 }
 
@@ -115,12 +116,23 @@
 
 static void init_common_sysregs(struct rec *rec, struct rd *rd)
 {
+	unsigned long mdcr_el2_val = read_mdcr_el2();
+
 	/* Set non-zero values only */
 	rec->common_sysregs.hcr_el2 = HCR_FLAGS;
 	rec->common_sysregs.vtcr_el2 =  realm_vtcr(rd);
-	rec->common_sysregs.vttbr_el2 = granule_addr(rd->s2_ctx.g_rtt);
-	rec->common_sysregs.vttbr_el2 &= MASK(TTBRx_EL2_BADDR);
-	rec->common_sysregs.vttbr_el2 |= INPLACE(VTTBR_EL2_VMID, rd->s2_ctx.vmid);
+	rec->common_sysregs.vttbr_el2 = (granule_addr(rd->s2_ctx.g_rtt) &
+					MASK(TTBRx_EL2_BADDR)) |
+					INPLACE(VTTBR_EL2_VMID, rd->s2_ctx.vmid);
+
+	/* Control trapping of accesses to PMU registers */
+	if (rd->pmu_enabled) {
+		mdcr_el2_val &= ~(MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT);
+	} else {
+		mdcr_el2_val |= (MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT);
+	}
+
+	rec->common_sysregs.mdcr_el2 = mdcr_el2_val;
 }
 
 static void init_rec_regs(struct rec *rec,
@@ -267,6 +279,9 @@
 	rec->realm_info.g_rtt = rd->s2_ctx.g_rtt;
 	rec->realm_info.g_rd = g_rd;
 
+	rec->realm_info.pmu_enabled = rd->pmu_enabled;
+	rec->realm_info.pmu_num_cnts = rd->pmu_num_cnts;
+
 	rec_params_measure(rd, &rec_params);
 
 	/*
diff --git a/runtime/rsi/realm_attest.c b/runtime/rsi/realm_attest.c
index 54ddb8e..8bb3c89 100644
--- a/runtime/rsi/realm_attest.c
+++ b/runtime/rsi/realm_attest.c
@@ -186,7 +186,7 @@
 
 		rec->token_sign_ctx.state = ATTEST_SIGN_NOT_STARTED;
 		restart = attestation_heap_reinit_pe(rec->aux_data.attest_heap_buf,
-						      REC_HEAP_PAGES * SZ_4K);
+							REC_HEAP_SIZE);
 		if (restart != 0) {
 			/* There is no provision for this failure so panic */
 			panic();