diff --git a/Makefile b/Makefile
index f33a12e..4b96be1 100644
--- a/Makefile
+++ b/Makefile
@@ -1245,6 +1245,7 @@
 	ENABLE_FEAT_S1POE \
 	ENABLE_FEAT_GCS \
 	ENABLE_FEAT_VHE \
+	ENABLE_FEAT_MTE_PERM \
 	ENABLE_MPAM_FOR_LOWER_ELS \
 	ENABLE_RME \
 	ENABLE_SPE_FOR_NS \
@@ -1389,6 +1390,7 @@
 	ENABLE_FEAT_S2POE \
 	ENABLE_FEAT_S1POE \
 	ENABLE_FEAT_GCS \
+	ENABLE_FEAT_MTE_PERM \
 	FEATURE_DETECTION \
 	TWED_DELAY \
 	ENABLE_FEAT_TWED \
diff --git a/common/feat_detect.c b/common/feat_detect.c
index d2e94e9..2aa0c5c 100644
--- a/common/feat_detect.c
+++ b/common/feat_detect.c
@@ -213,6 +213,8 @@
 		      "S2POE", 1, 1);
 	check_feature(ENABLE_FEAT_S1POE, read_feat_s1poe_id_field(),
 		      "S1POE", 1, 1);
+	check_feature(ENABLE_FEAT_MTE_PERM, read_feat_mte_perm_id_field(),
+		      "MTE_PERM", 1, 1);
 
 	/* v9.0 features */
 	check_feature(ENABLE_BRBE_FOR_NS, read_feat_brbe_id_field(),
diff --git a/docs/about/maintainers.rst b/docs/about/maintainers.rst
index 663859f..3de09c7 100644
--- a/docs/about/maintainers.rst
+++ b/docs/about/maintainers.rst
@@ -196,12 +196,14 @@
 
 JTAG DCC console driver
 ^^^^^^^^^^^^^^^^^^^^^^^
-:M: Michal Simek <michal.simek@amd.com>
-:G: `michalsimek`_
-:M: Venkatesh Yadav Abbarapu <venkatesh.abbarapu@amd.com>
-:G: `venkatesh`_
-:F: drivers/arm/dcc/
-:F: include/drivers/arm/dcc.h
+:|M|: Michal Simek <michal.simek@amd.com>
+:|G|: `michalsimek`_
+:|M|: Amit Nagal <amit.nagal@amd.com>
+:|G|: `amit-nagal`_
+:|M|: Akshay Belsare <akshay.belsare@amd.com>
+:|G|: `Akshay-Belsare`_
+:|F|: drivers/arm/dcc/
+:|F|: include/drivers/arm/dcc.h
 
 Power State Coordination Interface (PSCI)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -698,7 +700,10 @@
 :|M|: Nikita Travkin <nikita@trvn.ru>
 :|G|: `TravMurav`_
 :|F|: docs/plat/qti-msm8916.rst
+:|F|: plat/qti/mdm9607/
+:|F|: plat/qti/msm8909/
 :|F|: plat/qti/msm8916/
+:|F|: plat/qti/msm8939/
 
 Raspberry Pi 3 platform port
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -795,8 +800,10 @@
 ^^^^^^^^^^^^^^^^^^^^
 :|M|: Michal Simek <michal.simek@amd.com>
 :|G|: `michalsimek`_
-:|M|: Venkatesh Yadav Abbarapu <venkatesh.abbarapu@amd.com>
-:|G|: `venkatesh`_
+:|M|: Amit Nagal <amit.nagal@amd.com>
+:|G|: `amit-nagal`_
+:|M|: Akshay Belsare <akshay.belsare@amd.com>
+:|G|: `Akshay-Belsare`_
 :|F|: docs/plat/xilinx\*
 :|F|: plat/xilinx/
 
@@ -960,7 +967,6 @@
 .. _TonyXie06: https://github.com/TonyXie06
 .. _TravMurav: https://github.com/TravMurav
 .. _vwadekar: https://github.com/vwadekar
-.. _venkatesh: https://github.com/vabbarap
 .. _Yann-lms: https://github.com/Yann-lms
 .. _manish-pandey-arm: https://github.com/manish-pandey-arm
 .. _mardyk01: https://github.com/mardyk01
@@ -994,3 +1000,5 @@
 .. _jimmy-brisson: https://github.com/theotherjimmy
 .. _ChiaweiW: https://github.com/chiaweiw
 .. _Neal-liu: https://github.com/neal-liu
+.. _amit-nagal: https://github.com/amit-nagal
+.. _Akshay-Belsare: https://github.com/Akshay-Belsare
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index a5633e9..47fd450 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -45,6 +45,12 @@
    compiling TF-A. Its value must be a numeric, and defaults to 0. See also,
    *Armv8 Architecture Extensions* in :ref:`Firmware Design`.
 
+-  ``ARM_BL2_SP_LIST_DTS``: Path to DTS file snippet to override the hardcoded
+   SP nodes in tb_fw_config.
+
+-  ``ARM_SPMC_MANIFEST_DTS`` : path to an alternate manifest file used as the
+   SPMC Core manifest. Valid when ``SPD=spmd`` is selected.
+
 -  ``BL2``: This is an optional build option which specifies the path to BL2
    image for the ``fip`` target. In this case, the BL2 in the TF-A will not be
    built.
@@ -302,6 +308,13 @@
    flag can take the values 0 to 2, to align  with the ``FEATURE_DETECTION``
    mechanism. Default value is ``0``.
 
+-  ``ENABLE_FEAT_MTE_PERM``: Numeric value to enable support for
+   ``FEAT_MTE_PERM``, which introduces Allocation tag access permission to
+   memory region attributes. ``FEAT_MTE_PERM`` is a optional architectural
+   feature available from v8.9 and upwards.  This flag can take the values 0 to
+   2, to align  with the ``FEATURE_DETECTION`` mechanism. Default value is
+   ``0``.
+
 -  ``ENABLE_FEAT_PAN``: Numeric value to enable the ``FEAT_PAN`` (Privileged
    Access Never) extension. ``FEAT_PAN`` adds a bit to PSTATE, generating a
    permission fault for any privileged data access from EL1/EL2 to virtual
@@ -727,6 +740,10 @@
    1 (do save and restore). 0 is the default. An SPD may set this to 1 if it
    wants the timer registers to be saved and restored.
 
+-  ``OPTEE_SP_FW_CONFIG``: DTC build flag to include OP-TEE as SP in
+   tb_fw_config device tree. This flag is defined only when
+   ``ARM_SPMC_MANIFEST_DTS`` manifest file name contains pattern optee_sp.
+
 -  ``OVERRIDE_LIBC``: This option allows platforms to override the default libc
    for the BL image. It can be either 0 (include) or 1 (remove). The default
    value is 0.
@@ -936,6 +953,9 @@
       When ``EL3_EXCEPTION_HANDLING`` is ``1``, ``TSP_NS_INTR_ASYNC_PREEMPT``
       must also be set to ``1``.
 
+-  ``TS_SP_FW_CONFIG``: DTC build flag to include Trusted Services (Crypto and
+   internal-trusted-storage) as SP in tb_fw_config device tree.
+
 -  ``TWED_DELAY``: Numeric value to be set in order to delay the trapping of
    WFE instruction. ``ENABLE_FEAT_TWED`` build option must be enabled to set
    this delay. It can take values in the range (0-15). Default value is ``0``
diff --git a/docs/plat/arm/arm-build-options.rst b/docs/plat/arm/arm-build-options.rst
index e0b9242..3179267 100644
--- a/docs/plat/arm/arm-build-options.rst
+++ b/docs/plat/arm/arm-build-options.rst
@@ -109,19 +109,6 @@
    (```ethosn.bin```). This firmware image will be included in the FIP and
    loaded at runtime.
 
--  ``ARM_SPMC_MANIFEST_DTS`` : path to an alternate manifest file used as the
-   SPMC Core manifest. Valid when ``SPD=spmd`` is selected.
-
--  ``ARM_BL2_SP_LIST_DTS``: Path to DTS file snippet to override the hardcoded
-   SP nodes in tb_fw_config.
-
--  ``OPTEE_SP_FW_CONFIG``: DTC build flag to include OP-TEE as SP in tb_fw_config
-   device tree. This flag is defined only when ``ARM_SPMC_MANIFEST_DTS`` manifest
-   file name contains pattern optee_sp.
-
--  ``TS_SP_FW_CONFIG``: DTC build flag to include Trusted Services (Crypto and
-   internal-trusted-storage) as SP in tb_fw_config device tree.
-
 -  ``ARM_GPT_SUPPORT``: Enable GPT parser to get the entry address and length of
    the various partitions present in the GPT image. This support is available
    only for the BL2 component, and it is disabled by default.
diff --git a/docs/plat/qti-msm8916.rst b/docs/plat/qti-msm8916.rst
index d7c3642..3bc121a 100644
--- a/docs/plat/qti-msm8916.rst
+++ b/docs/plat/qti-msm8916.rst
@@ -1,16 +1,27 @@
-Qualcomm Snapdragon 410 (MSM8916/APQ8016)
-=========================================
+Qualcomm MSM8916
+================
+The MSM8916 platform port in TF-A supports multiple similar Qualcomm SoCs:
 
-The `Qualcomm Snapdragon 410`_ is Qualcomm's first 64-bit SoC, released in 2014
-with four ARM Cortex-A53 cores. There are differents variants (MSM8916,
-APQ8016(E), ...) that are all very similar. A popular device based on APQ8016E
-is the `DragonBoard 410c`_ single-board computer, but the SoC is also used in
-various mid-range smartphones/tablets.
++-----------------------+----------------+-------------------+-----------------+
+| System-on-Chip (SoC)  | TF-A Platform  | Application CPU   | Supports        |
++=======================+================+===================+=================+
+| `Snapdragon 410`_     |``PLAT=msm8916``| 4x ARM Cortex-A53 | AArch64/AArch32 |
+| (MSM8x16, APQ8016(E)) |                |                   |                 |
+| (`DragonBoard 410c`_) |                |                   |                 |
++-----------------------+----------------+-------------------+-----------------+
+| `Snapdragon 615`_     |``PLAT=msm8939``| 4x ARM Cortex-A53 | AArch64/AArch32 |
+| (MSM8x39, APQ8039)    |                | 4x ARM Cortex-A53 |                 |
++-----------------------+----------------+-------------------+-----------------+
+| `Snapdragon 210`_     |``PLAT=msm8909``| 4x ARM Cortex-A7  | AArch32 only    |
+| (MSM8x09, APQ8009)    |                |                   |                 |
++-----------------------+----------------+-------------------+-----------------+
+| `Snapdragon X5 Modem`_|``PLAT=mdm9607``| 1x ARM Cortex-A7  | AArch32 only    |
+| (MDM9x07)             |                |                   |                 |
++-----------------------+----------------+-------------------+-----------------+
 
-The TF-A port for MSM8916 provides a minimal, community-maintained
-EL3 firmware. It is primarily based on information from the public
-`Snapdragon 410E Technical Reference Manual`_ combined with a lot of
-trial and error to actually make it work.
+It provides a minimal, community-maintained EL3 firmware and PSCI implementation,
+based on information from the public `Snapdragon 410E Technical Reference Manual`_
+combined with a lot of trial and error to actually make it work.
 
 .. note::
 	Unlike the :doc:`QTI SC7180/SC7280 <qti>` ports, this port does **not**
@@ -56,24 +67,27 @@
 
 Build
 -----
-It is possible to build for either AArch64 or AArch32. AArch64 is the preferred
-build option.
+It is possible to build for either AArch64 or AArch32. Some platforms use 32-bit
+CPUs that only support AArch32 (see table above). For all others AArch64 is the
+preferred build option.
 
 AArch64 (BL31)
 ^^^^^^^^^^^^^^
-Setup the cross compiler for AArch64 and build BL31 for ``msm8916``::
+Setup the cross compiler for AArch64 and build BL31 for one of the platforms in
+the table above::
 
-	$ make CROSS_COMPILE=aarch64-none-elf- PLAT=msm8916
+	$ make CROSS_COMPILE=aarch64-none-elf- PLAT=...
 
-The BL31 ELF image is generated in ``build/msm8916/release/bl31/bl31.elf``.
+The BL31 ELF image is generated in ``build/$PLAT/release/bl31/bl31.elf``.
 
 AArch32 (BL32/SP_MIN)
 ^^^^^^^^^^^^^^^^^^^^^
-Setup the cross compiler for AArch32 and build BL32 with SP_MIN for ``msm8916``::
+Setup the cross compiler for AArch32 and build BL32 with SP_MIN for one of the
+platforms in the table above::
 
-	$ make CROSS_COMPILE=arm-none-eabi- PLAT=msm8916 ARCH=aarch32 AARCH32_SP=sp_min
+	$ make CROSS_COMPILE=arm-none-eabi- PLAT=... ARCH=aarch32 AARCH32_SP=sp_min
 
-The BL32 ELF image is generated in ``build/msm8916/release/bl32/bl32.elf``.
+The BL32 ELF image is generated in ``build/$PLAT/release/bl32/bl32.elf``.
 
 Build Options
 -------------
@@ -186,7 +200,10 @@
 	[0] welcome to lk
 	...
 
-.. _Qualcomm Snapdragon 410: https://www.qualcomm.com/products/snapdragon-processors-410
+.. _Snapdragon 210: https://www.qualcomm.com/products/snapdragon-processors-210
+.. _Snapdragon 410: https://www.qualcomm.com/products/snapdragon-processors-410
+.. _Snapdragon 615: https://www.qualcomm.com/products/snapdragon-processors-615
+.. _Snapdragon X5 Modem: https://www.qualcomm.com/products/snapdragon-modems-4g-lte-x5
 .. _DragonBoard 410c: https://www.96boards.org/product/dragonboard410c/
 .. _Snapdragon 410E Technical Reference Manual: https://developer.qualcomm.com/download/sd410/snapdragon-410e-technical-reference-manual.pdf
 .. _U-Boot for DragonBoard 410c: https://u-boot.readthedocs.io/en/latest/board/qualcomm/dragonboard410c.html
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 5dbcd0a..698fad4 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -275,6 +275,9 @@
 /* ID_AA64ISAR2_EL1 definitions */
 #define ID_AA64ISAR2_EL1		S3_0_C0_C6_2
 
+/* ID_AA64PFR2_EL1 definitions */
+#define ID_AA64PFR2_EL1			S3_0_C0_C4_2
+
 #define ID_AA64ISAR2_GPA3_SHIFT		U(8)
 #define ID_AA64ISAR2_GPA3_MASK		ULL(0xf)
 
@@ -402,6 +405,16 @@
 #define ID_AA64PFR1_EL1_RNG_TRAP_SUPPORTED	ULL(0x1)
 #define ID_AA64PFR1_EL1_RNG_TRAP_NOT_SUPPORTED	ULL(0x0)
 
+/* ID_AA64PFR2_EL1 definitions */
+#define ID_AA64PFR2_EL1_MTEPERM_SHIFT		U(0)
+#define ID_AA64PFR2_EL1_MTEPERM_MASK		ULL(0xf)
+
+#define ID_AA64PFR2_EL1_MTESTOREONLY_SHIFT	U(4)
+#define ID_AA64PFR2_EL1_MTESTOREONLY_MASK	ULL(0xf)
+
+#define ID_AA64PFR2_EL1_MTEFAR_SHIFT		U(8)
+#define ID_AA64PFR2_EL1_MTEFAR_MASK		ULL(0xf)
+
 #define VDISR_EL2				S3_4_C12_C1_1
 #define VSESR_EL2				S3_4_C5_C2_3
 
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index 9d71987..9f11f15 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -148,6 +148,11 @@
 	return ISOLATE_FIELD(read_id_aa64mmfr0_el1(), ID_AA64MMFR0_EL1_FGT);
 }
 
+static unsigned int read_feat_mte_perm_id_field(void)
+{
+	return ISOLATE_FIELD(read_id_aa64pfr2_el1(), ID_AA64PFR2_EL1_MTEPERM);
+}
+
 static inline bool is_feat_fgt_supported(void)
 {
 	if (ENABLE_FEAT_FGT == FEAT_STATE_DISABLED) {
@@ -161,6 +166,19 @@
 	return read_feat_fgt_id_field() != 0U;
 }
 
+static inline bool is_feat_mte_perm_supported(void)
+{
+	if (ENABLE_FEAT_MTE_PERM == FEAT_STATE_DISABLED) {
+		return false;
+	}
+
+	if (ENABLE_FEAT_MTE_PERM == FEAT_STATE_ALWAYS) {
+		return true;
+	}
+
+	return read_feat_mte_perm_id_field() != 0U;
+}
+
 static unsigned int read_feat_ecv_id_field(void)
 {
 	return ISOLATE_FIELD(read_id_aa64mmfr0_el1(), ID_AA64MMFR0_EL1_ECV);
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 5b3d4c2..3121079 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2022, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -269,6 +269,7 @@
 DEFINE_RENAME_IDREG_READ_FUNC(id_aa64isar2_el1, ID_AA64ISAR2_EL1)
 DEFINE_IDREG_READ_FUNC(id_aa64pfr0_el1)
 DEFINE_IDREG_READ_FUNC(id_aa64pfr1_el1)
+DEFINE_RENAME_IDREG_READ_FUNC(id_aa64pfr2_el1, ID_AA64PFR2_EL1)
 DEFINE_IDREG_READ_FUNC(id_aa64dfr0_el1)
 DEFINE_IDREG_READ_FUNC(id_afr0_el1)
 DEFINE_SYSREG_READ_FUNC(CurrentEl)
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index de476e4..80ad68c 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -16,13 +16,21 @@
 
 #if ENABLE_FEAT_AMU
 #if __aarch64__
-void amu_enable(bool el2_unused, cpu_context_t *ctx);
+void amu_enable(cpu_context_t *ctx);
+void amu_init_el3(void);
+void amu_init_el2_unused(void);
 #else
 void amu_enable(bool el2_unused);
 #endif
 #else
 #if __aarch64__
-static inline void amu_enable(bool el2_unused, cpu_context_t *ctx)
+void amu_enable(cpu_context_t *ctx)
+{
+}
+void amu_init_el3(void)
+{
+}
+void amu_init_el2_unused(void)
 {
 }
 #else
diff --git a/include/lib/extensions/brbe.h b/include/lib/extensions/brbe.h
index 9ee2444..194efba 100644
--- a/include/lib/extensions/brbe.h
+++ b/include/lib/extensions/brbe.h
@@ -8,9 +8,9 @@
 #define BRBE_H
 
 #if ENABLE_BRBE_FOR_NS
-void brbe_enable(void);
+void brbe_init_el3(void);
 #else
-static inline void brbe_enable(void)
+static inline void brbe_init_el3(void)
 {
 }
 #endif /* ENABLE_BRBE_FOR_NS */
diff --git a/include/lib/extensions/mpam.h b/include/lib/extensions/mpam.h
index 4327278..e5438ce 100644
--- a/include/lib/extensions/mpam.h
+++ b/include/lib/extensions/mpam.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2018-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,11 +10,15 @@
 #include <stdbool.h>
 
 #if ENABLE_MPAM_FOR_LOWER_ELS
-void mpam_enable(bool el2_unused);
+void mpam_init_el3(void);
+void mpam_init_el2_unused(void);
 #else
-static inline void mpam_enable(bool el2_unused)
+static inline void mpam_init_el3(void)
 {
 }
-#endif
+static inline void mpam_init_el2_unused(void)
+{
+}
+#endif /* ENABLE_MPAM_FOR_LOWER_ELS */
 
 #endif /* MPAM_H */
diff --git a/include/lib/extensions/pmuv3.h b/include/lib/extensions/pmuv3.h
index 5d5d055..62fee7b 100644
--- a/include/lib/extensions/pmuv3.h
+++ b/include/lib/extensions/pmuv3.h
@@ -9,7 +9,7 @@
 
 #include <context.h>
 
-void pmuv3_disable_el3(void);
+void pmuv3_init_el3(void);
 
 #ifdef __aarch64__
 void pmuv3_enable(cpu_context_t *ctx);
diff --git a/include/lib/extensions/sme.h b/include/lib/extensions/sme.h
index 0e9c4b9..dbefdfc 100644
--- a/include/lib/extensions/sme.h
+++ b/include/lib/extensions/sme.h
@@ -22,11 +22,19 @@
 
 #if ENABLE_SME_FOR_NS
 void sme_enable(cpu_context_t *context);
+void sme_init_el3(void);
+void sme_init_el2_unused(void);
 void sme_disable(cpu_context_t *context);
 #else
 static inline void sme_enable(cpu_context_t *context)
 {
 }
+static inline void sme_init_el3(void)
+{
+}
+static inline void sme_init_el2_unused(void)
+{
+}
 static inline void sme_disable(cpu_context_t *context)
 {
 }
diff --git a/include/lib/extensions/spe.h b/include/lib/extensions/spe.h
index 02fccae..7b39037 100644
--- a/include/lib/extensions/spe.h
+++ b/include/lib/extensions/spe.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,15 +10,19 @@
 #include <stdbool.h>
 
 #if ENABLE_SPE_FOR_NS
-void spe_enable(bool el2_unused);
+void spe_init_el3(void);
+void spe_init_el2_unused(void);
 void spe_disable(void);
 #else
-static inline void spe_enable(bool el2_unused)
+static inline void spe_init_el3(void)
+{
+}
+static inline void spe_init_el2_unused(void)
 {
 }
 static inline void spe_disable(void)
 {
 }
-#endif
+#endif /* ENABLE_SPE_FOR_NS */
 
 #endif /* SPE_H */
diff --git a/include/lib/extensions/sve.h b/include/lib/extensions/sve.h
index 1faed2d..fc76a16 100644
--- a/include/lib/extensions/sve.h
+++ b/include/lib/extensions/sve.h
@@ -11,11 +11,15 @@
 
 #if (ENABLE_SME_FOR_NS || ENABLE_SVE_FOR_NS)
 void sve_enable(cpu_context_t *context);
+void sve_init_el2_unused(void);
 void sve_disable(cpu_context_t *context);
 #else
 static inline void sve_enable(cpu_context_t *context)
 {
 }
+static inline void sve_init_el2_unused(void)
+{
+}
 static inline void sve_disable(cpu_context_t *context)
 {
 }
diff --git a/include/lib/extensions/sys_reg_trace.h b/include/lib/extensions/sys_reg_trace.h
index 5915c55..d9f7f1b 100644
--- a/include/lib/extensions/sys_reg_trace.h
+++ b/include/lib/extensions/sys_reg_trace.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,10 +10,12 @@
 #include <context.h>
 
 #if ENABLE_SYS_REG_TRACE_FOR_NS
+
 #if __aarch64__
 void sys_reg_trace_enable(cpu_context_t *context);
+void sys_reg_trace_init_el2_unused(void);
 #else
-void sys_reg_trace_enable(void);
+void sys_reg_trace_init_el3(void);
 #endif /* __aarch64__ */
 
 #else /* !ENABLE_SYS_REG_TRACE_FOR_NS */
@@ -22,11 +24,18 @@
 static inline void sys_reg_trace_enable(cpu_context_t *context)
 {
 }
+static inline void sys_reg_trace_disable(cpu_context_t *context)
+{
+}
+static inline void sys_reg_trace_init_el2_unused(void)
+{
+}
 #else
-static inline void sys_reg_trace_enable(void)
+static inline void sys_reg_trace_init_el3(void)
 {
 }
 #endif /* __aarch64__ */
+
 #endif /* ENABLE_SYS_REG_TRACE_FOR_NS */
 
 #endif /* SYS_REG_TRACE_H */
diff --git a/include/lib/extensions/trbe.h b/include/lib/extensions/trbe.h
index 861a4ad..0bed433 100644
--- a/include/lib/extensions/trbe.h
+++ b/include/lib/extensions/trbe.h
@@ -8,9 +8,13 @@
 #define TRBE_H
 
 #if ENABLE_TRBE_FOR_NS
-void trbe_enable(void);
+void trbe_init_el3(void);
+void trbe_init_el2_unused(void);
 #else
-static inline void trbe_enable(void)
+static inline void trbe_init_el3(void)
+{
+}
+static inline void trbe_init_el2_unused(void)
 {
 }
 #endif /* ENABLE_TRBE_FOR_NS */
diff --git a/include/lib/extensions/trf.h b/include/lib/extensions/trf.h
index 91a9615..1ac7cda 100644
--- a/include/lib/extensions/trf.h
+++ b/include/lib/extensions/trf.h
@@ -8,9 +8,13 @@
 #define TRF_H
 
 #if ENABLE_TRF_FOR_NS
-void trf_enable(void);
+void trf_init_el3(void);
+void trf_init_el2_unused(void);
 #else
-static inline void trf_enable(void)
+static inline void trf_init_el3(void)
+{
+}
+static inline void trf_init_el2_unused(void)
 {
 }
 #endif /* ENABLE_TRF_FOR_NS */
diff --git a/include/lib/psa/psa_manifest/sid.h b/include/lib/psa/psa_manifest/sid.h
index be78bae..7183112 100644
--- a/include/lib/psa/psa_manifest/sid.h
+++ b/include/lib/psa/psa_manifest/sid.h
@@ -8,6 +8,9 @@
 #ifndef PSA_MANIFEST_SID_H
 #define PSA_MANIFEST_SID_H
 
+/******** RSS_SP_CRYPTO ********/
+#define RSS_CRYPTO_HANDLE				(0x40000100U)
+
 /******** RSS_SP_PLATFORM ********/
 #define RSS_PLATFORM_SERVICE_HANDLE			(0x40000105U)
 
diff --git a/include/lib/psa/rss_crypto_defs.h b/include/lib/psa/rss_crypto_defs.h
new file mode 100644
index 0000000..b8c7426
--- /dev/null
+++ b/include/lib/psa/rss_crypto_defs.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#ifndef RSS_CRYPTO_DEFS_H
+#define RSS_CRYPTO_DEFS_H
+
+/* Declares types that encode errors, algorithms, key types, policies, etc. */
+#include "psa/crypto_types.h"
+
+/*
+ * Value identifying export public key function API, used to dispatch the request
+ * to the corresponding API implementation in the Crypto service backend.
+ *
+ */
+#define RSS_CRYPTO_EXPORT_PUBLIC_KEY_SID	(uint16_t)(0x701)
+
+/*
+ * The persistent key identifiers for RSS builtin keys.
+ */
+enum rss_key_id_builtin_t {
+	RSS_BUILTIN_KEY_ID_HOST_S_ROTPK = 0x7FFF816Cu,
+	RSS_BUILTIN_KEY_ID_HOST_NS_ROTPK,
+	RSS_BUILTIN_KEY_ID_HOST_CCA_ROTPK,
+};
+
+/*
+ * This type is used to overcome a limitation within RSS firmware in the number of maximum
+ * IOVECs it can use especially in psa_aead_encrypt and psa_aead_decrypt.
+ */
+#define RSS_CRYPTO_MAX_NONCE_LENGTH (16u)
+struct rss_crypto_aead_pack_input {
+	uint8_t nonce[RSS_CRYPTO_MAX_NONCE_LENGTH];
+	uint32_t nonce_length;
+};
+
+/*
+ * Structure used to pack non-pointer types in a call
+ */
+struct rss_crypto_pack_iovec {
+	psa_key_id_t key_id;	/* Key id */
+	psa_algorithm_t alg;	/* Algorithm */
+	uint32_t op_handle;	/* Frontend context handle associated
+				   to a multipart operation */
+	uint32_t capacity;	/* Key derivation capacity */
+	uint32_t ad_length;	/* Additional Data length for multipart AEAD */
+	uint32_t plaintext_length;	/* Plaintext length for multipart AEAD */
+	struct rss_crypto_aead_pack_input aead_in;	/* Packs AEAD-related inputs */
+	uint16_t function_id;	/* Used to identify the function in the API dispatcher
+				   to the service backend. See rss_crypto_func_sid for
+				   detail */
+	uint16_t step;		/* Key derivation step */
+};
+
+#endif /* RSS_CRYPTO_DEFS_H */
diff --git a/include/lib/psa/rss_platform_api.h b/include/lib/psa/rss_platform_api.h
index 1dd7d05..8f74a51 100644
--- a/include/lib/psa/rss_platform_api.h
+++ b/include/lib/psa/rss_platform_api.h
@@ -11,6 +11,7 @@
 #include <stdint.h>
 
 #include "psa/error.h"
+#include <rss_crypto_defs.h>
 
 #define RSS_PLATFORM_API_ID_NV_READ       (1010)
 #define RSS_PLATFORM_API_ID_NV_INCREMENT  (1011)
@@ -41,4 +42,19 @@
 rss_platform_nv_counter_read(uint32_t counter_id,
 		uint32_t size, uint8_t *val);
 
+/*
+ * Reads the public key or the public part of a key pair in binary format.
+ *
+ * key		Identifier of the key to export.
+ * data		Buffer where the key data is to be written.
+ * data_size	Size of the data buffer in bytes.
+ * data_length	On success, the number of bytes that make up the key data.
+ *
+ * PSA_SUCCESS if the value is read correctly. Otherwise,
+ *	it returns a PSA_ERROR.
+ */
+psa_status_t
+rss_platform_key_read(enum rss_key_id_builtin_t key, uint8_t *data,
+		size_t data_size, size_t *data_length);
+
 #endif /* RSS_PLATFORM_API_H */
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index 6414aaa..b60b8e0 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2023, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -142,19 +142,19 @@
 	}
 
 	if (is_feat_sys_reg_trace_supported()) {
-		sys_reg_trace_enable();
+		sys_reg_trace_init_el3();
 	}
 
 	if (is_feat_trf_supported()) {
-		trf_enable();
+		trf_init_el3();
 	}
 
 	/*
 	 * Also applies to PMU < v3. The PMU is only disabled for EL3 and Secure
 	 * state execution. This does not affect lower NS ELs.
 	 */
-	pmuv3_disable_el3();
-#endif
+	pmuv3_init_el3();
+#endif /*  IMAGE_BL32 */
 }
 
 /*******************************************************************************
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 4a6598a..d9ff0b6 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -496,21 +496,53 @@
 }
 
 /*******************************************************************************
- * Enable architecture extensions on first entry to Non-secure world.
- * When EL2 is implemented but unused `el2_unused` is non-zero, otherwise
- * it is zero. This function updates some registers in-place and its contents
- * are being prepared to be moved to cm_manage_extensions_el3 and
- * cm_manage_extensions_nonsecure.
+ * Enable architecture extensions for EL3 execution. This function only updates
+ * registers in-place which are expected to either never change or be
+ * overwritten by el3_exit.
  ******************************************************************************/
-static void manage_extensions_nonsecure_mixed(bool el2_unused, cpu_context_t *ctx)
-{
 #if IMAGE_BL31
+void cm_manage_extensions_el3(void)
+{
 	if (is_feat_spe_supported()) {
-		spe_enable(el2_unused);
+		spe_init_el3();
 	}
 
 	if (is_feat_amu_supported()) {
-		amu_enable(el2_unused, ctx);
+		amu_init_el3();
+	}
+
+	if (is_feat_sme_supported()) {
+		sme_init_el3();
+	}
+
+	if (is_feat_mpam_supported()) {
+		mpam_init_el3();
+	}
+
+	if (is_feat_trbe_supported()) {
+		trbe_init_el3();
+	}
+
+	if (is_feat_brbe_supported()) {
+		brbe_init_el3();
+	}
+
+	if (is_feat_trf_supported()) {
+		trf_init_el3();
+	}
+
+	pmuv3_init_el3();
+}
+#endif /* IMAGE_BL31 */
+
+/*******************************************************************************
+ * Enable architecture extensions on first entry to Non-secure world.
+ ******************************************************************************/
+static void manage_extensions_nonsecure(cpu_context_t *ctx)
+{
+#if IMAGE_BL31
+	if (is_feat_amu_supported()) {
+		amu_enable(ctx);
 	}
 
 	/* Enable SVE and FPU/SIMD */
@@ -522,46 +554,10 @@
 		sme_enable(ctx);
 	}
 
-	if (is_feat_mpam_supported()) {
-		mpam_enable(el2_unused);
-	}
-
-	if (is_feat_trbe_supported()) {
-		trbe_enable();
-	}
-
-	if (is_feat_brbe_supported()) {
-		brbe_enable();
-	}
-
 	if (is_feat_sys_reg_trace_supported()) {
 		sys_reg_trace_enable(ctx);
 	}
 
-	if (is_feat_trf_supported()) {
-		trf_enable();
-	}
-#endif
-}
-
-/*******************************************************************************
- * Enable architecture extensions for EL3 execution. This function only updates
- * registers in-place which are expected to either never change or be
- * overwritten by el3_exit.
- ******************************************************************************/
-#if IMAGE_BL31
-void cm_manage_extensions_el3(void)
-{
-	pmuv3_disable_el3();
-}
-#endif /* IMAGE_BL31 */
-
-/*******************************************************************************
- * Enable architecture extensions on first entry to Non-secure world.
- ******************************************************************************/
-static void manage_extensions_nonsecure(cpu_context_t *ctx)
-{
-#if IMAGE_BL31
 	pmuv3_enable(ctx);
 #endif /* IMAGE_BL31 */
 }
@@ -573,7 +569,39 @@
 static void manage_extensions_nonsecure_el2_unused(void)
 {
 #if IMAGE_BL31
+	if (is_feat_spe_supported()) {
+		spe_init_el2_unused();
+	}
+
+	if (is_feat_amu_supported()) {
+		amu_init_el2_unused();
+	}
+
+	if (is_feat_mpam_supported()) {
+		mpam_init_el2_unused();
+	}
+
+	if (is_feat_trbe_supported()) {
+		trbe_init_el2_unused();
+	}
+
+	if (is_feat_sys_reg_trace_supported()) {
+		sys_reg_trace_init_el2_unused();
+	}
+
+	if (is_feat_trf_supported()) {
+		trf_init_el2_unused();
+	}
+
 	pmuv3_init_el2_unused();
+
+	if (is_feat_sve_supported()) {
+		sve_init_el2_unused();
+	}
+
+	if (is_feat_sme_supported()) {
+		sme_init_el2_unused();
+	}
 #endif /* IMAGE_BL31 */
 }
 
@@ -606,6 +634,7 @@
 		 * Enable SME, SVE, FPU/SIMD in secure context, secure manager
 		 * must ensure SME, SVE, and FPU/SIMD context properly managed.
 		 */
+			sme_init_el3();
 			sme_enable(ctx);
 		} else {
 		/*
@@ -656,7 +685,6 @@
 {
 	u_register_t sctlr_elx, scr_el3, mdcr_el2;
 	cpu_context_t *ctx = cm_get_context(security_state);
-	bool el2_unused = false;
 	uint64_t hcr_el2 = 0U;
 
 	assert(ctx != NULL);
@@ -694,8 +722,6 @@
 #endif
 			write_sctlr_el2(sctlr_elx);
 		} else if (el2_implemented != EL_IMPL_NONE) {
-			el2_unused = true;
-
 			/*
 			 * EL2 present but unused, need to disable safely.
 			 * SCTLR_EL2 can be ignored in this case.
@@ -719,24 +745,8 @@
 			 * Initialise CPTR_EL2 setting all fields rather than
 			 * relying on the hw. All fields have architecturally
 			 * UNKNOWN reset values.
-			 *
-			 * CPTR_EL2.TCPAC: Set to zero so that Non-secure EL1
-			 *  accesses to the CPACR_EL1 or CPACR from both
-			 *  Execution states do not trap to EL2.
-			 *
-			 * CPTR_EL2.TTA: Set to zero so that Non-secure System
-			 *  register accesses to the trace registers from both
-			 *  Execution states do not trap to EL2.
-			 *  If PE trace unit System registers are not implemented
-			 *  then this bit is reserved, and must be set to zero.
-			 *
-			 * CPTR_EL2.TFP: Set to zero so that Non-secure accesses
-			 *  to SIMD and floating-point functionality from both
-			 *  Execution states do not trap to EL2.
 			 */
-			write_cptr_el2(CPTR_EL2_RESET_VAL &
-					~(CPTR_EL2_TCPAC_BIT | CPTR_EL2_TTA_BIT
-					| CPTR_EL2_TFP_BIT));
+			write_cptr_el2(CPTR_EL2_RESET_VAL);
 
 			/*
 			 * Initialise CNTHCTL_EL2. All fields are
@@ -787,16 +797,6 @@
 			 * relying on hw. Some fields are architecturally
 			 * UNKNOWN on reset.
 			 *
-			 * MDCR_EL2.TTRF: Set to zero so that access to Trace
-			 *  Filter Control register TRFCR_EL1 at EL1 is not
-			 *  trapped to EL2. This bit is RES0 in versions of
-			 *  the architecture earlier than ARMv8.4.
-			 *
-			 * MDCR_EL2.TPMS: Set to zero so that accesses to
-			 *  Statistical Profiling control registers from EL1
-			 *  do not trap to EL2. This bit is RES0 when SPE is
-			 *  not implemented.
-			 *
 			 * MDCR_EL2.TDRA: Set to zero so that Non-secure EL0 and
 			 *  EL1 System register accesses to the Debug ROM
 			 *  registers are not trapped to EL2.
@@ -810,16 +810,10 @@
 			 *
 			 * MDCR_EL2.TDE: Set to zero so that debug exceptions
 			 *  are not routed to EL2.
-			 *
-			 * MDCR_EL2.E2TB: Set to zero so that the trace Buffer
-			 *  owning exception level is NS-EL1 and, tracing is
-			 *  prohibited at NS-EL2. These bits are RES0 when
-			 *  FEAT_TRBE is not implemented.
 			 */
-			mdcr_el2 = ((MDCR_EL2_RESET_VAL) & ~(MDCR_EL2_TTRF |
-				     MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT |
-				     MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT |
-				     MDCR_EL2_E2TB(MDCR_EL2_E2TB_EL1)));
+			mdcr_el2 = ((MDCR_EL2_RESET_VAL) &
+				   ~(MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT |
+				     MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT));
 
 			write_mdcr_el2(mdcr_el2);
 
@@ -844,7 +838,6 @@
 
 			manage_extensions_nonsecure_el2_unused();
 		}
-		manage_extensions_nonsecure_mixed(el2_unused, ctx);
 	}
 
 	cm_el1_sysregs_context_restore(security_state);
@@ -1149,23 +1142,15 @@
 void cm_prepare_el3_exit_ns(void)
 {
 #if CTX_INCLUDE_EL2_REGS
+#if ENABLE_ASSERTIONS
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
 	assert(ctx != NULL);
 
 	/* Assert that EL2 is used. */
-#if ENABLE_ASSERTIONS
-	el3_state_t *state = get_el3state_ctx(ctx);
-	u_register_t scr_el3 = read_ctx_reg(state, CTX_SCR_EL3);
-#endif
+	u_register_t scr_el3 = read_ctx_reg(get_el3state_ctx(ctx), CTX_SCR_EL3);
 	assert(((scr_el3 & SCR_HCE_BIT) != 0UL) &&
 			(el_implemented(2U) != EL_IMPL_NONE));
-
-	/*
-	 * Currently some extensions are configured using
-	 * direct register updates. Therefore, do this here
-	 * instead of when setting up context.
-	 */
-	manage_extensions_nonsecure_mixed(0, ctx);
+#endif /* ENABLE_ASSERTIONS */
 
 	/*
 	 * Set the NS bit to be able to access the ICC_SRE_EL2
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index c650629..e0d3399 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -188,95 +188,66 @@
  * Enable counters. This function is meant to be invoked by the context
  * management library before exiting from EL3.
  */
-void amu_enable(bool el2_unused, cpu_context_t *ctx)
+void amu_enable(cpu_context_t *ctx)
 {
-	uint64_t amcfgr_el0_ncg;		/* Number of counter groups */
-	uint64_t amcgcr_el0_cg0nc;		/* Number of group 0 counters */
-
-	uint64_t amcntenset0_el0_px = 0x0;	/* Group 0 enable mask */
-	uint64_t amcntenset1_el0_px = 0x0;	/* Group 1 enable mask */
-
-	if (el2_unused) {
-		/*
-		 * CPTR_EL2.TAM: Set to zero so any accesses to the Activity
-		 * Monitor registers do not trap to EL2.
-		 */
-		write_cptr_el2_tam(0U);
-	}
-
 	/*
-	 * Retrieve and update the CPTR_EL3 value from the context mentioned
-	 * in 'ctx'. Set CPTR_EL3.TAM to zero so that any accesses to
-	 * the Activity Monitor registers do not trap to EL3.
+	 * Set CPTR_EL3.TAM to zero so that any accesses to the Activity Monitor
+	 * registers do not trap to EL3.
 	 */
 	ctx_write_cptr_el3_tam(ctx, 0U);
 
-	/*
-	 * Retrieve the number of architected counters. All of these counters
-	 * are enabled by default.
-	 */
+	/* Initialize FEAT_AMUv1p1 features if present. */
+	if (is_feat_amuv1p1_supported()) {
+		/*
+		 * Set SCR_EL3.AMVOFFEN to one so that accesses to virtual
+		 * offset registers at EL2 do not trap to EL3
+		 */
+		ctx_write_scr_el3_amvoffen(ctx, 1U);
+	}
+}
 
-	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
-	amcntenset0_el0_px = (UINT64_C(1) << (amcgcr_el0_cg0nc)) - 1U;
+void amu_init_el3(void)
+{
+	uint64_t group0_impl_ctr = read_amcgcr_el0_cg0nc();
+	uint64_t group0_en_mask = (1 << (group0_impl_ctr)) - 1U;
+	uint64_t num_ctr_groups = read_amcfgr_el0_ncg();
 
-	assert(amcgcr_el0_cg0nc <= AMU_AMCGCR_CG0NC_MAX);
-
-	/*
-	 * The platform may opt to enable specific auxiliary counters. This can
-	 * be done via the common FCONF getter, or via the platform-implemented
-	 * function.
-	 */
+	/* Enable all architected counters by default */
+	write_amcntenset0_el0_px(group0_en_mask);
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
-	const struct amu_topology *topology;
+	if (num_ctr_groups > 0U) {
+		uint64_t amcntenset1_el0_px = 0x0; /* Group 1 enable mask */
+		const struct amu_topology *topology;
 
+		/*
+		 * The platform may opt to enable specific auxiliary counters.
+		 * This can be done via the common FCONF getter, or via the
+		 * platform-implemented function.
+		 */
 #if ENABLE_AMU_FCONF
-	topology = FCONF_GET_PROPERTY(amu, config, topology);
+		topology = FCONF_GET_PROPERTY(amu, config, topology);
 #else
-	topology = plat_amu_topology();
+		topology = plat_amu_topology();
 #endif /* ENABLE_AMU_FCONF */
 
-	if (topology != NULL) {
-		unsigned int core_pos = plat_my_core_pos();
+		if (topology != NULL) {
+			unsigned int core_pos = plat_my_core_pos();
 
-		amcntenset1_el0_px = topology->cores[core_pos].enable;
-	} else {
-		ERROR("AMU: failed to generate AMU topology\n");
+			amcntenset1_el0_px = topology->cores[core_pos].enable;
+		} else {
+			ERROR("AMU: failed to generate AMU topology\n");
+		}
+
+		write_amcntenset1_el0_px(amcntenset1_el0_px);
+	}
+#else /* ENABLE_AMU_AUXILIARY_COUNTERS */
+	if (num_ctr_groups > 0U) {
+		VERBOSE("AMU: auxiliary counters detected but support is disabled\n");
 	}
 #endif /* ENABLE_AMU_AUXILIARY_COUNTERS */
 
-	/*
-	 * Enable the requested counters.
-	 */
-
-	write_amcntenset0_el0_px(amcntenset0_el0_px);
-
-	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
-	if (amcfgr_el0_ncg > 0U) {
-		write_amcntenset1_el0_px(amcntenset1_el0_px);
-
-#if !ENABLE_AMU_AUXILIARY_COUNTERS
-		VERBOSE("AMU: auxiliary counters detected but support is disabled\n");
-#endif
-	}
-
-	/* Initialize FEAT_AMUv1p1 features if present. */
 	if (is_feat_amuv1p1_supported()) {
-		if (el2_unused) {
-			/*
-			 * Make sure virtual offsets are disabled if EL2 not
-			 * used.
-			 */
-			write_hcr_el2_amvoffen(0U);
-		} else {
-			/*
-			 * Virtual offset registers are only accessible from EL3
-			 * and EL2, when clear, this bit traps accesses from EL2
-			 * so we set it to 1 when EL2 is present.
-			 */
-			ctx_write_scr_el3_amvoffen(ctx, 1U);
-		}
-
 #if AMU_RESTRICT_COUNTERS
 		/*
 		 * FEAT_AMUv1p1 adds a register field to restrict access to
@@ -297,6 +268,21 @@
 #endif
 }
 
+void amu_init_el2_unused(void)
+{
+	/*
+	 * CPTR_EL2.TAM: Set to zero so any accesses to the Activity Monitor
+	 *  registers do not trap to EL2.
+	 */
+	write_cptr_el2_tam(0U);
+
+	/* Initialize FEAT_AMUv1p1 features if present. */
+	if (is_feat_amuv1p1_supported()) {
+		/* Make sure virtual offsets are disabled if EL2 not used. */
+		write_hcr_el2_amvoffen(0U);
+	}
+}
+
 /* Read the group 0 counter identified by the given `idx`. */
 static uint64_t amu_group0_cnt_read(unsigned int idx)
 {
@@ -526,10 +512,10 @@
 
 	uint64_t hcr_el2_amvoffen = 0;	/* AMU virtual offsets enabled */
 
-	uint64_t amcfgr_el0_ncg;	/* Number of counter groups */
 	uint64_t amcgcr_el0_cg0nc;	/* Number of group 0 counters */
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
+	uint64_t amcfgr_el0_ncg;	/* Number of counter groups */
 	uint64_t amcgcr_el0_cg1nc;	/* Number of group 1 counters */
 	uint64_t amcg1idr_el0_voff;	/* Auxiliary counters with virtual offsets */
 #endif
@@ -541,7 +527,6 @@
 	core_pos = plat_my_core_pos();
 	ctx = &amu_ctxs_[core_pos];
 
-	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
 	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
 
 	if (is_feat_amuv1p1_supported()) {
@@ -549,22 +534,12 @@
 	}
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
+	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
 	amcgcr_el0_cg1nc = (amcfgr_el0_ncg > 0U) ? read_amcgcr_el0_cg1nc() : 0U;
 	amcg1idr_el0_voff = (hcr_el2_amvoffen != 0U) ? read_amcg1idr_el0_voff() : 0U;
 #endif
 
 	/*
-	 * Sanity check that all counters were disabled when the context was
-	 * previously saved.
-	 */
-
-	assert(read_amcntenset0_el0_px() == 0U);
-
-	if (amcfgr_el0_ncg > 0U) {
-		assert(read_amcntenset1_el0_px() == 0U);
-	}
-
-	/*
 	 * Restore the counter values from the local context.
 	 */
 
diff --git a/lib/extensions/brbe/brbe.c b/lib/extensions/brbe/brbe.c
index 329cf98..37bd834 100644
--- a/lib/extensions/brbe/brbe.c
+++ b/lib/extensions/brbe/brbe.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * Copyright (c) 2022-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,8 +7,9 @@
 #include <arch.h>
 #include <arch_features.h>
 #include <arch_helpers.h>
+#include <lib/extensions/brbe.h>
 
-void brbe_enable(void)
+void brbe_init_el3(void)
 {
 	uint64_t val;
 
diff --git a/lib/extensions/mpam/mpam.c b/lib/extensions/mpam/mpam.c
index 62533fc..6462c97 100644
--- a/lib/extensions/mpam/mpam.c
+++ b/lib/extensions/mpam/mpam.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2018-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -11,7 +11,7 @@
 #include <arch_helpers.h>
 #include <lib/extensions/mpam.h>
 
-void mpam_enable(bool el2_unused)
+void mpam_init_el3(void)
 {
 	/*
 	 * Enable MPAM, and disable trapping to EL3 when lower ELs access their
@@ -19,15 +19,18 @@
 	 */
 	write_mpam3_el3(MPAM3_EL3_MPAMEN_BIT);
 
-	/*
-	 * If EL2 is implemented but unused, disable trapping to EL2 when lower
-	 * ELs access their own MPAM registers.
-	 */
-	if (el2_unused) {
-		write_mpam2_el2(0ULL);
+}
 
-		if ((read_mpamidr_el1() & MPAMIDR_HAS_HCR_BIT) != 0U) {
-			write_mpamhcr_el2(0ULL);
-		}
+/*
+ * If EL2 is implemented but unused, disable trapping to EL2 when lower ELs
+ * access their own MPAM registers.
+ */
+void mpam_init_el2_unused(void)
+{
+	write_mpam2_el2(0ULL);
+
+	if ((read_mpamidr_el1() & MPAMIDR_HAS_HCR_BIT) != 0U) {
+		write_mpamhcr_el2(0ULL);
 	}
+
 }
diff --git a/lib/extensions/pmuv3/aarch32/pmuv3.c b/lib/extensions/pmuv3/aarch32/pmuv3.c
index fe4205e..effb7e0 100644
--- a/lib/extensions/pmuv3/aarch32/pmuv3.c
+++ b/lib/extensions/pmuv3/aarch32/pmuv3.c
@@ -29,7 +29,7 @@
  * Applies to all PMU versions. Name is PMUv3 for compatibility with aarch64 and
  * to not clash with platforms which reuse the PMU name
  */
-void pmuv3_disable_el3(void)
+void pmuv3_init_el3(void)
 {
 	u_register_t sdcr = read_sdcr();
 
diff --git a/lib/extensions/pmuv3/aarch64/pmuv3.c b/lib/extensions/pmuv3/aarch64/pmuv3.c
index f83a5ee..fda71aa 100644
--- a/lib/extensions/pmuv3/aarch64/pmuv3.c
+++ b/lib/extensions/pmuv3/aarch64/pmuv3.c
@@ -48,7 +48,7 @@
 	return mdcr_el3;
 }
 
-void pmuv3_disable_el3(void)
+void pmuv3_init_el3(void)
 {
 	u_register_t mdcr_el3 = read_mdcr_el3();
 
diff --git a/lib/extensions/sme/sme.c b/lib/extensions/sme/sme.c
index 3423dba..d705b64 100644
--- a/lib/extensions/sme/sme.c
+++ b/lib/extensions/sme/sme.c
@@ -17,7 +17,6 @@
 void sme_enable(cpu_context_t *context)
 {
 	u_register_t reg;
-	u_register_t cptr_el3;
 	el3_state_t *state;
 
 	/* Get the context state. */
@@ -32,9 +31,14 @@
 	reg = read_ctx_reg(state, CTX_SCR_EL3);
 	reg |= SCR_ENTP2_BIT;
 	write_ctx_reg(state, CTX_SCR_EL3, reg);
+}
 
-	/* Set CPTR_EL3.ESM bit so we can write SMCR_EL3 without trapping. */
-	cptr_el3 = read_cptr_el3();
+void sme_init_el3(void)
+{
+	u_register_t cptr_el3 = read_cptr_el3();
+	u_register_t smcr_el3;
+
+	/* Set CPTR_EL3.ESM bit so we can access SMCR_EL3 without trapping. */
 	write_cptr_el3(cptr_el3 | ESM_BIT);
 	isb();
 
@@ -43,11 +47,10 @@
 	 * to be the least restrictive, then lower ELs can restrict as needed
 	 * using SMCR_EL2 and SMCR_EL1.
 	 */
-	reg = SMCR_ELX_LEN_MAX;
-
+	smcr_el3 = SMCR_ELX_LEN_MAX;
 	if (read_feat_sme_fa64_id_field() != 0U) {
 		VERBOSE("[SME] FA64 enabled\n");
-		reg |= SMCR_ELX_FA64_BIT;
+		smcr_el3 |= SMCR_ELX_FA64_BIT;
 	}
 
 	/*
@@ -58,15 +61,24 @@
 	 */
 	if (is_feat_sme2_supported()) {
 		VERBOSE("SME2 enabled\n");
-		reg |= SMCR_ELX_EZT0_BIT;
+		smcr_el3 |= SMCR_ELX_EZT0_BIT;
 	}
-	write_smcr_el3(reg);
+	write_smcr_el3(smcr_el3);
 
 	/* Reset CPTR_EL3 value. */
 	write_cptr_el3(cptr_el3);
 	isb();
 }
 
+void sme_init_el2_unused(void)
+{
+	/*
+	 * CPTR_EL2.TCPAC: Set to zero so that Non-secure EL1 accesses to the
+	 *  CPACR_EL1 or CPACR from both Execution states do not trap to EL2.
+	 */
+	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TCPAC_BIT);
+}
+
 void sme_disable(cpu_context_t *context)
 {
 	u_register_t reg;
diff --git a/lib/extensions/spe/spe.c b/lib/extensions/spe/spe.c
index b1fe39f..236b102 100644
--- a/lib/extensions/spe/spe.c
+++ b/lib/extensions/spe/spe.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -21,25 +21,10 @@
 	__asm__ volatile("hint #17");
 }
 
-void spe_enable(bool el2_unused)
+void spe_init_el3(void)
 {
 	uint64_t v;
 
-	if (el2_unused) {
-		/*
-		 * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
-		 * profiling controls to EL2.
-		 *
-		 * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in Non-secure
-		 * state. Accesses to profiling buffer controls at
-		 * Non-secure EL1 are not trapped to EL2.
-		 */
-		v = read_mdcr_el2();
-		v &= ~MDCR_EL2_TPMS;
-		v |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
-		write_mdcr_el2(v);
-	}
-
 	/*
 	 * MDCR_EL2.NSPB (ARM v8.2): SPE enabled in Non-secure state
 	 * and disabled in secure state. Accesses to SPE registers at
@@ -55,6 +40,24 @@
 	write_mdcr_el3(v);
 }
 
+void spe_init_el2_unused(void)
+{
+	uint64_t v;
+
+	/*
+	 * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
+	 * profiling controls to EL2.
+	 *
+	 * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in Non-secure
+	 * state. Accesses to profiling buffer controls at
+	 * Non-secure EL1 are not trapped to EL2.
+	 */
+	v = read_mdcr_el2();
+	v &= ~MDCR_EL2_TPMS;
+	v |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
+	write_mdcr_el2(v);
+}
+
 void spe_disable(void)
 {
 	uint64_t v;
diff --git a/lib/extensions/sve/sve.c b/lib/extensions/sve/sve.c
index f551ca7..eb4ac8d 100644
--- a/lib/extensions/sve/sve.c
+++ b/lib/extensions/sve/sve.c
@@ -37,6 +37,16 @@
 		(ZCR_EL3_LEN_MASK & CONVERT_SVE_LENGTH(SVE_VECTOR_LEN)));
 }
 
+void sve_init_el2_unused(void)
+{
+	/*
+	 * CPTR_EL2.TFP: Set to zero so that Non-secure accesses to Advanced
+	 *  SIMD and floating-point functionality from both Execution states do
+	 *  not trap to EL2.
+	 */
+	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TFP_BIT);
+}
+
 void sve_disable(cpu_context_t *context)
 {
 	u_register_t reg;
diff --git a/lib/extensions/sys_reg_trace/aarch32/sys_reg_trace.c b/lib/extensions/sys_reg_trace/aarch32/sys_reg_trace.c
index b3f44b7..6da504e 100644
--- a/lib/extensions/sys_reg_trace/aarch32/sys_reg_trace.c
+++ b/lib/extensions/sys_reg_trace/aarch32/sys_reg_trace.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,7 +10,7 @@
 #include <arch_helpers.h>
 #include <lib/extensions/sys_reg_trace.h>
 
-void sys_reg_trace_enable(void)
+void sys_reg_trace_init_el3(void)
 {
 	uint32_t val;
 
diff --git a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
index e61cb90..4b57f67 100644
--- a/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
+++ b/lib/extensions/sys_reg_trace/aarch64/sys_reg_trace.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -24,3 +24,14 @@
 	val &= ~TTA_BIT;
 	write_ctx_reg(get_el3state_ctx(ctx), CTX_CPTR_EL3, val);
 }
+
+void sys_reg_trace_init_el2_unused(void)
+{
+	/*
+	 * CPTR_EL2.TTA: Set to zero so that Non-secure System register accesses
+	 *  to the trace registers from both Execution states do not trap to
+	 *  EL2. If PE trace unit System registers are not implemented then this
+	 *  bit is reserved, and must be set to zero.
+	 */
+	write_cptr_el2(read_cptr_el2() & ~CPTR_EL2_TTA_BIT);
+}
diff --git a/lib/extensions/trbe/trbe.c b/lib/extensions/trbe/trbe.c
index fa139ca..461ea73 100644
--- a/lib/extensions/trbe/trbe.c
+++ b/lib/extensions/trbe/trbe.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -19,9 +19,9 @@
 	__asm__ volatile("hint #18");
 }
 
-void trbe_enable(void)
+void trbe_init_el3(void)
 {
-	uint64_t val;
+	u_register_t val;
 
 	/*
 	 * MDCR_EL3.NSTB = 0b11
@@ -34,6 +34,17 @@
 	write_mdcr_el3(val);
 }
 
+void trbe_init_el2_unused(void)
+{
+	/*
+	 * MDCR_EL2.E2TB: Set to zero so that the trace Buffer
+	 *  owning exception level is NS-EL1 and, tracing is
+	 *  prohibited at NS-EL2. These bits are RES0 when
+	 *  FEAT_TRBE is not implemented.
+	 */
+	write_mdcr_el2(read_mdcr_el2() & ~MDCR_EL2_E2TB(MDCR_EL2_E2TB_EL1));
+}
+
 static void *trbe_drain_trace_buffers_hook(const void *arg __unused)
 {
 	if (is_feat_trbe_supported()) {
diff --git a/lib/extensions/trf/aarch32/trf.c b/lib/extensions/trf/aarch32/trf.c
index 0c63efa..e13b4db 100644
--- a/lib/extensions/trf/aarch32/trf.c
+++ b/lib/extensions/trf/aarch32/trf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -10,7 +10,7 @@
 #include <arch_helpers.h>
 #include <lib/extensions/trf.h>
 
-void trf_enable(void)
+void trf_init_el3(void)
 {
 	uint32_t val;
 
diff --git a/lib/extensions/trf/aarch64/trf.c b/lib/extensions/trf/aarch64/trf.c
index 941692b..f681b28 100644
--- a/lib/extensions/trf/aarch64/trf.c
+++ b/lib/extensions/trf/aarch64/trf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -9,9 +9,9 @@
 #include <arch_helpers.h>
 #include <lib/extensions/trf.h>
 
-void trf_enable(void)
+void trf_init_el3(void)
 {
-	uint64_t val;
+	u_register_t val;
 
 	/*
 	 * MDCR_EL3.TTRF = b0
@@ -22,3 +22,15 @@
 	val &= ~MDCR_TTRF_BIT;
 	write_mdcr_el3(val);
 }
+
+void trf_init_el2_unused(void)
+{
+	/*
+	 * MDCR_EL2.TTRF: Set to zero so that access to Trace
+	 *  Filter Control register TRFCR_EL1 at EL1 is not
+	 *  trapped to EL2. This bit is RES0 in versions of
+	 *  the architecture earlier than ARMv8.4.
+	 *
+	 */
+	write_mdcr_el2(read_mdcr_el2() & ~MDCR_EL2_TTRF);
+}
diff --git a/lib/libc/printf.c b/lib/libc/printf.c
index faccfdf..6931a7e 100644
--- a/lib/libc/printf.c
+++ b/lib/libc/printf.c
@@ -36,7 +36,7 @@
 }
 
 static int unsigned_num_print(unsigned long long int unum, unsigned int radix,
-			      char padc, int padn)
+			      char padc, int padn, bool uppercase)
 {
 	/* Just need enough space to store 64 bit decimal integer */
 	char num_buf[20];
@@ -51,10 +51,13 @@
 
 	do {
 		rem = unum % radix;
-		if (rem < 0xa)
+		if (rem < 0xa) {
 			num_buf[i] = '0' + rem;
-		else
+		} else if (uppercase) {
+			num_buf[i] = 'A' + (rem - 0xa);
+		} else {
 			num_buf[i] = 'a' + (rem - 0xa);
+		}
 		i++;
 		unum /= radix;
 	} while (unum > 0U);
@@ -105,8 +108,10 @@
 	char padc = '\0'; /* Padding character */
 	int padn; /* Number of characters to pad */
 	int count = 0; /* Number of printed characters */
+	bool uppercase; /* Print characters in uppercase */
 
 	while (*fmt != '\0') {
+		uppercase = false;
 		l_count = 0;
 		padn = 0;
 
@@ -129,7 +134,7 @@
 					unum = (unsigned long long int)num;
 
 				count += unsigned_num_print(unum, 10,
-							    padc, padn);
+							    padc, padn, uppercase);
 				break;
 			case 'c':
 				(void)putchar(va_arg(args, int));
@@ -147,12 +152,15 @@
 				}
 
 				count += unsigned_num_print(unum, 16,
-							    padc, padn);
+							    padc, padn, uppercase);
 				break;
+			case 'X':
+				uppercase = true;
+				// fall through
 			case 'x':
 				unum = get_unum_va_args(args, l_count);
 				count += unsigned_num_print(unum, 16,
-							    padc, padn);
+							    padc, padn, uppercase);
 				break;
 			case 'z':
 				if (sizeof(size_t) == 8U)
@@ -167,7 +175,7 @@
 			case 'u':
 				unum = get_unum_va_args(args, l_count);
 				count += unsigned_num_print(unum, 10,
-							    padc, padn);
+							    padc, padn, uppercase);
 				break;
 			case '0':
 				padc = '0';
diff --git a/lib/psa/rss_platform.c b/lib/psa/rss_platform.c
index 359f894..7d90bfc 100644
--- a/lib/psa/rss_platform.c
+++ b/lib/psa/rss_platform.c
@@ -5,10 +5,9 @@
  *
  */
 
-#include <stdint.h>
-
 #include <psa/client.h>
 #include <psa_manifest/sid.h>
+#include <rss_crypto_defs.h>
 #include <rss_platform_api.h>
 
 psa_status_t
@@ -41,3 +40,30 @@
 			RSS_PLATFORM_API_ID_NV_READ,
 			in_vec, 1, out_vec, 1);
 }
+
+psa_status_t
+rss_platform_key_read(enum rss_key_id_builtin_t key, uint8_t *data,
+		size_t data_size, size_t *data_length)
+{
+	psa_status_t status;
+
+	struct rss_crypto_pack_iovec iov = {
+		.function_id = RSS_CRYPTO_EXPORT_PUBLIC_KEY_SID,
+		.key_id = key,
+	};
+
+	psa_invec in_vec[] = {
+		{.base = &iov, .len = sizeof(struct rss_crypto_pack_iovec)},
+	};
+	psa_outvec out_vec[] = {
+		{.base = data, .len = data_size}
+	};
+
+	status = psa_call(RSS_CRYPTO_HANDLE, PSA_IPC_CALL,
+			in_vec, IOVEC_LEN(in_vec),
+			out_vec, IOVEC_LEN(out_vec));
+
+	*data_length = out_vec[0].len;
+
+	return status;
+}
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
index c847a9e..bb6a35c 100644
--- a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
@@ -27,16 +27,14 @@
 	if (size == PAGE_SIZE_4KB) {
 		tgranx = read_id_aa64mmfr0_el0_tgran4_field();
 		/* MSB of TGRAN4 field will be '1' for unsupported feature */
-		return ((tgranx >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED) &&
-			(tgranx < 8U));
+		return (tgranx < 8U);
 	} else if (size == PAGE_SIZE_16KB) {
 		tgranx = read_id_aa64mmfr0_el0_tgran16_field();
 		return (tgranx >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED);
 	} else if (size == PAGE_SIZE_64KB) {
 		tgranx = read_id_aa64mmfr0_el0_tgran64_field();
 		/* MSB of TGRAN64 field will be '1' for unsupported feature */
-		return ((tgranx >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED) &&
-			(tgranx < 8U));
+		return (tgranx < 8U);
 	} else {
 		return false;
 	}
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index f9077eb..a065039 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -191,6 +191,9 @@
 # Flag to enable access to Guarded Control Stack (FEAT_GCS)
 ENABLE_FEAT_GCS			:= 0
 
+# Flag to enable NoTagAccess memory region attribute for stage 2 of translation.
+ENABLE_FEAT_MTE_PERM		:= 0
+
 # By default BL31 encryption disabled
 ENCRYPT_BL31			:= 0
 
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index 99dd6c7..df04617 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -205,14 +205,18 @@
 #endif
 
 /*
- * PLAT_ARM_MAX_BL2_SIZE is calculated using the current BL2 debug size plus a
- * little space for growth.
+ * Set the maximum size of BL2 to be close to half of the Trusted SRAM.
+ * Maximum size of BL2 increases as Trusted SRAM size increases.
  */
 #if CRYPTO_SUPPORT
 #if (TF_MBEDTLS_KEY_ALG_ID == TF_MBEDTLS_RSA_AND_ECDSA) || COT_DESC_IN_DTB
-# define PLAT_ARM_MAX_BL2_SIZE	(UL(0x1E000) - FVP_BL2_ROMLIB_OPTIMIZATION)
+# define PLAT_ARM_MAX_BL2_SIZE	((PLAT_ARM_TRUSTED_SRAM_SIZE / 2) - \
+				 (2 * PAGE_SIZE) - \
+				 FVP_BL2_ROMLIB_OPTIMIZATION)
 #else
-# define PLAT_ARM_MAX_BL2_SIZE	(UL(0x1D000) - FVP_BL2_ROMLIB_OPTIMIZATION)
+# define PLAT_ARM_MAX_BL2_SIZE	((PLAT_ARM_TRUSTED_SRAM_SIZE / 2) - \
+				 (3 * PAGE_SIZE) - \
+				 FVP_BL2_ROMLIB_OPTIMIZATION)
 #endif
 #elif ARM_BL31_IN_DRAM
 /* When ARM_BL31_IN_DRAM is set, BL2 can use almost all of Trusted SRAM. */
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 436cab3..7df150e 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -80,6 +80,7 @@
 ENABLE_FEAT_CSV2_2		:= 2
 ENABLE_FEAT_DIT			:= 2
 ENABLE_FEAT_PAN			:= 2
+ENABLE_FEAT_MTE_PERM		:= 2
 ENABLE_FEAT_VHE			:= 2
 CTX_INCLUDE_NEVE_REGS		:= 2
 ENABLE_FEAT_SEL2		:= 2
diff --git a/plat/arm/board/tc/include/tc_plat.h b/plat/arm/board/tc/include/tc_plat.h
index 117fbb4..a6b2b0d 100644
--- a/plat/arm/board/tc/include/tc_plat.h
+++ b/plat/arm/board/tc/include/tc_plat.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2021-2023, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,10 @@
 #ifndef TC_PLAT_H
 #define TC_PLAT_H
 
+#ifdef PLATFORM_TEST_ROTPK
+#include <rss_crypto_defs.h>
+#endif
+
 void tc_bl31_common_platform_setup(void);
 
 #ifdef PLATFORM_TEST_TFM_TESTSUITE
@@ -17,4 +21,13 @@
 int nv_counter_test(void);
 #endif
 
+#ifdef PLATFORM_TEST_ROTPK
+struct key_id_info {
+	enum rss_key_id_builtin_t key_id;
+	const char *key_id_name;
+};
+
+int rotpk_test(void);
+#endif
+
 #endif /* TC_PLAT_H */
diff --git a/plat/arm/board/tc/nv_counter_test.c b/plat/arm/board/tc/nv_counter_test.c
index f9e001e..179ec4b 100644
--- a/plat/arm/board/tc/nv_counter_test.c
+++ b/plat/arm/board/tc/nv_counter_test.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, ARM Limited. All rights reserved.
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -22,29 +22,29 @@
 
 	status = rss_comms_init(PLAT_RSS_AP_SND_MHU_BASE, PLAT_RSS_AP_RCV_MHU_BASE);
 	if (status != PSA_SUCCESS) {
-		printf("Failed to initialize RSS communication channel\n");
+		printf("Failed to initialize RSS communication channel - psa_status = %d\n", status);
 		return -1;
 	}
 
 	for (id = 0; id < 3; id++) {
 		status = rss_platform_nv_counter_read(id, sizeof(old_val), (uint8_t *)&old_val);
 		if (status != PSA_SUCCESS) {
-			printf("Failed during first id=(%d) rss_platform_nv_counter_read\n",
-				       id);
+			printf("Failed during first id=(%d) rss_platform_nv_counter_read - psa_status = %d\n",
+				       id, status);
 			return -1;
 		}
 
 		status = rss_platform_nv_counter_increment(id);
 		if (status != PSA_SUCCESS) {
-			printf("Failed during id=(%d) rss_platform_nv_counter_increment\n",
-					id);
+			printf("Failed during id=(%d) rss_platform_nv_counter_increment - psa_status = %d\n",
+					id, status);
 			return -1;
 		}
 
 		status = rss_platform_nv_counter_read(id, sizeof(new_val), (uint8_t *)&new_val);
 		if (status != PSA_SUCCESS) {
-			printf("Failed during second id=(%d) rss_platform_nv_counter_read\n",
-					id);
+			printf("Failed during second id=(%d) rss_platform_nv_counter_read - psa_status = %d\n",
+					id, status);
 			return -1;
 		}
 
diff --git a/plat/arm/board/tc/platform.mk b/plat/arm/board/tc/platform.mk
index 5a1d83a..8ca33ca 100644
--- a/plat/arm/board/tc/platform.mk
+++ b/plat/arm/board/tc/platform.mk
@@ -189,31 +189,11 @@
 endif
 
 ifneq (${PLATFORM_TEST},)
-    $(eval $(call add_define,PLATFORM_TESTS))
-
-    ifeq (${PLATFORM_TEST},rss-nv-counters)
-        include drivers/arm/rss/rss_comms.mk
-
-        # Test code.
-        BL31_SOURCES	+=	plat/arm/board/tc/nv_counter_test.c
-
-        # Code under testing.
-        BL31_SOURCES	+=	lib/psa/rss_platform.c \
-				drivers/arm/rss/rss_comms.c \
-				${RSS_COMMS_SOURCES}
-
-        PLAT_INCLUDES	+=	-Iinclude/lib/psa
-
-        $(eval $(call add_define,PLATFORM_TEST_NV_COUNTERS))
-    else ifeq (${PLATFORM_TEST},tfm-testsuite)
-        # Add this include as first, before arm_common.mk. This is necessary
-        # because arm_common.mk builds Mbed TLS, and platform_test.mk can
-        # change the list of Mbed TLS files that are to be compiled
-        # (LIBMBEDTLS_SRCS).
-        include plat/arm/board/tc/platform_test.mk
-    else
-        $(error "Unsupported PLATFORM_TEST value")
-    endif
+    # Add this include as first, before arm_common.mk. This is necessary
+    # because arm_common.mk builds Mbed TLS, and platform_test.mk can
+    # change the list of Mbed TLS files that are to be compiled
+    # (LIBMBEDTLS_SRCS).
+    include plat/arm/board/tc/platform_test.mk
 endif
 
 
diff --git a/plat/arm/board/tc/platform_test.mk b/plat/arm/board/tc/platform_test.mk
index e974855..2fd5ea0 100644
--- a/plat/arm/board/tc/platform_test.mk
+++ b/plat/arm/board/tc/platform_test.mk
@@ -3,7 +3,37 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
-ifeq (${PLATFORM_TEST},tfm-testsuite)
+$(eval $(call add_define,PLATFORM_TESTS))
+
+ifeq (${PLATFORM_TEST},rss-nv-counters)
+    include drivers/arm/rss/rss_comms.mk
+
+    # Test code.
+    BL31_SOURCES	+=	plat/arm/board/tc/nv_counter_test.c
+
+    # Code under testing.
+    BL31_SOURCES	+=	lib/psa/rss_platform.c \
+				drivers/arm/rss/rss_comms.c \
+				${RSS_COMMS_SOURCES}
+
+    PLAT_INCLUDES	+=	-Iinclude/lib/psa
+
+    $(eval $(call add_define,PLATFORM_TEST_NV_COUNTERS))
+else ifeq (${PLATFORM_TEST},rss-rotpk)
+    include drivers/arm/rss/rss_comms.mk
+
+    # Test code.
+    BL31_SOURCES	+=	plat/arm/board/tc/rotpk_test.c
+
+    # Code under testing.
+    BL31_SOURCES	+=	lib/psa/rss_platform.c \
+				drivers/arm/rss/rss_comms.c \
+				${RSS_COMMS_SOURCES}
+
+    PLAT_INCLUDES	+=	-Iinclude/lib/psa
+
+    $(eval $(call add_define,PLATFORM_TEST_ROTPK))
+else ifeq (${PLATFORM_TEST},tfm-testsuite)
 
     # The variables need to be set to compile the platform test:
     ifeq (${TF_M_TESTS_PATH},)
@@ -77,4 +107,6 @@
     $(eval $(call add_define,DELEG_ATTEST_DUMP_TOKEN_AND_KEY))
 
     $(eval $(call add_define,PLATFORM_TEST_TFM_TESTSUITE))
+else
+    $(error "Unsupported PLATFORM_TEST value")
 endif
diff --git a/plat/arm/board/tc/rotpk_test.c b/plat/arm/board/tc/rotpk_test.c
new file mode 100644
index 0000000..ed56c31
--- /dev/null
+++ b/plat/arm/board/tc/rotpk_test.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <drivers/arm/rss_comms.h>
+#include <plat/common/platform.h>
+#include <rss_platform_api.h>
+#include <tc_plat.h>
+
+static void print_hex(const char *key_id_name, size_t key_size, const uint8_t *key_buf)
+{
+	printf("%s = ", key_id_name);
+	for (int i = 0; i < key_size; i++) {
+		printf("%02x", key_buf[i]);
+	}
+	printf("\n\n");
+}
+
+int rotpk_test(void)
+{
+	psa_status_t status;
+	uint8_t key_buf[128];
+	size_t key_size;
+
+	struct key_id_info key_ids[3] = {
+	       {.key_id = RSS_BUILTIN_KEY_ID_HOST_S_ROTPK,  .key_id_name = "Secure-ROTPK"},
+	       {.key_id = RSS_BUILTIN_KEY_ID_HOST_NS_ROTPK,  .key_id_name = "NS-ROTPK"},
+	       {.key_id = RSS_BUILTIN_KEY_ID_HOST_CCA_ROTPK,  .key_id_name = "CCA-ROTPK"}
+	};
+
+	status = rss_comms_init(PLAT_RSS_AP_SND_MHU_BASE, PLAT_RSS_AP_RCV_MHU_BASE);
+	if (status != PSA_SUCCESS) {
+		printf("Failed to initialize RSS communication channel - psa_status = %d\n", status);
+		return -1;
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(key_ids); i++) {
+		status = rss_platform_key_read(key_ids[i].key_id, key_buf,
+			       sizeof(key_buf), &key_size);
+		if (status != PSA_SUCCESS) {
+			printf("Failed to retrieve %s - psa_status = %d\n", key_ids[i].key_id_name, status);
+			return -1;
+		}
+		print_hex(key_ids[i].key_id_name, key_size, key_buf);
+	}
+
+	printf("Passed rotpk_test\n");
+
+	return 0;
+}
diff --git a/plat/arm/board/tc/tc_bl31_setup.c b/plat/arm/board/tc/tc_bl31_setup.c
index ca3a032..ff7809d 100644
--- a/plat/arm/board/tc/tc_bl31_setup.c
+++ b/plat/arm/board/tc/tc_bl31_setup.c
@@ -59,6 +59,8 @@
 
 #ifdef PLATFORM_TEST_NV_COUNTERS
 	tests_failed = nv_counter_test();
+#elif PLATFORM_TEST_ROTPK
+	tests_failed = rotpk_test();
 #elif PLATFORM_TEST_TFM_TESTSUITE
 	tests_failed = run_platform_tests();
 #endif
@@ -120,12 +122,9 @@
 {
 	/* Trusted Watchdog timer is the only source of Group0 interrupt now. */
 	if (intid == SBSA_SECURE_WDOG_INTID) {
-		INFO("Watchdog restarted\n");
 		/* Refresh the timer. */
 		plat_arm_secure_wdt_refresh();
 
-		/* Deactivate the corresponding interrupt. */
-		plat_ic_end_of_interrupt(intid);
 		return 0;
 	}
 
diff --git a/plat/mediatek/drivers/apusys/devapc/apusys_dapc_v1.h b/plat/mediatek/drivers/apusys/devapc/apusys_dapc_v1.h
index 2f5d47b..1b77942 100644
--- a/plat/mediatek/drivers/apusys/devapc/apusys_dapc_v1.h
+++ b/plat/mediatek/drivers/apusys/devapc/apusys_dapc_v1.h
@@ -155,19 +155,4 @@
 			   FORBIDDEN,     NO_PROTECTION, FORBIDDEN, FORBIDDEN, \
 			   FORBIDDEN,     FORBIDDEN,     FORBIDDEN, FORBIDDEN, \
 			   FORBIDDEN,     FORBIDDEN,     FORBIDDEN, FORBIDDEN)
-
-#define SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW(domain)			 \
-	APUSYS_APC_AO_ATTR(domain,						 \
-			   NO_PROTECTION, FORBIDDEN,     FORBIDDEN, SEC_RW_ONLY, \
-			   FORBIDDEN,     NO_PROTECTION, FORBIDDEN, FORBIDDEN,   \
-			   FORBIDDEN,     FORBIDDEN,     FORBIDDEN, FORBIDDEN,   \
-			   FORBIDDEN,     FORBIDDEN,     FORBIDDEN, FORBIDDEN)
-
-#define SLAVE_FORBID_EXCEPT_D0_D3_SEC_RW_D5_NO_PROTECT(domain)		       \
-	APUSYS_APC_AO_ATTR(domain,					       \
-			   SEC_RW_ONLY, FORBIDDEN,     FORBIDDEN, SEC_RW_ONLY, \
-			   FORBIDDEN,   NO_PROTECTION, FORBIDDEN, FORBIDDEN,   \
-			   FORBIDDEN,   FORBIDDEN,     FORBIDDEN, FORBIDDEN,   \
-			   FORBIDDEN,   FORBIDDEN,     FORBIDDEN, FORBIDDEN)
-
 #endif /* APUSYS_DAPC_V1_H */
diff --git a/plat/mediatek/drivers/apusys/mt8188/apusys_devapc_def.h b/plat/mediatek/drivers/apusys/mt8188/apusys_devapc_def.h
index e74b022..47a2a94 100644
--- a/plat/mediatek/drivers/apusys/mt8188/apusys_devapc_def.h
+++ b/plat/mediatek/drivers/apusys/mt8188/apusys_devapc_def.h
@@ -29,14 +29,14 @@
 #define SLAVE_AO_BCRM			SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_AO_DAPC_WRAP		SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_AO_DAPC_CON		SLAVE_FORBID_EXCEPT_D0_SEC_RW_D5_NO_PROTECT
-#define SLAVE_RCX_ACX_BULK		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW
-#define SLAVE_ACX0_BCRM			SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW
+#define SLAVE_RCX_ACX_BULK		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
+#define SLAVE_ACX0_BCRM			SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_RPCTOP_LITE_ACX0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
-#define SLAVE_ACX1_BCRM			SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW
+#define SLAVE_ACX1_BCRM			SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_RPCTOP_LITE_ACX1		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
-#define SLAVE_RCX_TO_ACX0_0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW
+#define SLAVE_RCX_TO_ACX0_0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_RCX_TO_ACX0_1		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
-#define SLAVE_SAE_TO_ACX0_0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT_D3_SEC_RW
+#define SLAVE_SAE_TO_ACX0_0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_SAE_TO_ACX0_1		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_RCX_TO_ACX1_0		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
 #define SLAVE_RCX_TO_ACX1_1		SLAVE_FORBID_EXCEPT_D0_D5_NO_PROTECT
@@ -79,7 +79,7 @@
 #define SLAVE_SENSOR_WRAP_ACX1_DLA1	SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_SENSOR_WRAP_ACX1_VPU0	SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_REVISER			SLAVE_FORBID_EXCEPT_D0_SEC_RW
-#define SLAVE_NOC			SLAVE_FORBID_EXCEPT_D0_D3_SEC_RW_D5_NO_PROTECT
+#define SLAVE_NOC			SLAVE_FORBID_EXCEPT_D0_SEC_RW
 #define SLAVE_BCRM			SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_DAPC_WRAP			SLAVE_FORBID_EXCEPT_D5_NO_PROTECT
 #define SLAVE_DAPC_CON			SLAVE_FORBID_EXCEPT_D0_SEC_RW_D5_NO_PROTECT
diff --git a/plat/qti/mdm9607/platform.mk b/plat/qti/mdm9607/platform.mk
new file mode 100644
index 0000000..4c6938c
--- /dev/null
+++ b/plat/qti/mdm9607/platform.mk
@@ -0,0 +1,14 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+ARM_ARCH_MAJOR		:= 7
+ARM_CORTEX_A7		:= yes
+
+BL31_BASE		?= 0x87e00000
+PRELOADED_BL33_BASE	?= 0x82900000
+QTI_UART_NUM		?= 5
+
+include plat/qti/msm8916/platform.mk
diff --git a/plat/qti/mdm9607/sp_min/sp_min-mdm9607.mk b/plat/qti/mdm9607/sp_min/sp_min-mdm9607.mk
new file mode 100644
index 0000000..28a6f01
--- /dev/null
+++ b/plat/qti/mdm9607/sp_min/sp_min-mdm9607.mk
@@ -0,0 +1,7 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+include plat/qti/msm8916/sp_min/sp_min-msm8916.mk
diff --git a/plat/qti/msm8909/platform.mk b/plat/qti/msm8909/platform.mk
new file mode 100644
index 0000000..8a88aa5
--- /dev/null
+++ b/plat/qti/msm8909/platform.mk
@@ -0,0 +1,13 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+ARM_ARCH_MAJOR	:= 7
+ARM_CORTEX_A7	:= yes
+
+BL31_BASE	?= 0x87e80000
+QTI_UART_NUM	?= 1
+
+include plat/qti/msm8916/platform.mk
diff --git a/plat/qti/msm8909/sp_min/sp_min-msm8909.mk b/plat/qti/msm8909/sp_min/sp_min-msm8909.mk
new file mode 100644
index 0000000..28a6f01
--- /dev/null
+++ b/plat/qti/msm8909/sp_min/sp_min-msm8909.mk
@@ -0,0 +1,7 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+include plat/qti/msm8916/sp_min/sp_min-msm8916.mk
diff --git a/plat/qti/msm8916/aarch32/msm8916_helpers.S b/plat/qti/msm8916/aarch32/msm8916_helpers.S
index ea39663..dc35043 100644
--- a/plat/qti/msm8916/aarch32/msm8916_helpers.S
+++ b/plat/qti/msm8916/aarch32/msm8916_helpers.S
@@ -6,10 +6,15 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <platform_def.h>
 
 #include <msm8916_mmap.h>
 
+#if PLATFORM_CORE_COUNT > 1
 #define APCS_TCM_START_ADDR	0x10
+#else
+#define APCS_TCM_START_ADDR	0x34
+#endif
 #define APCS_TCM_REDIRECT_EN_0	BIT_32(0)
 
 	.globl	plat_crash_console_init
@@ -79,9 +84,18 @@
 	 * -------------------------------------------------
 	 */
 func plat_my_core_pos
-	/* There is just a single cluster so this is very simple */
-	ldcopr	r0, MPIDR
-	and	r0, r0, #MPIDR_CPU_MASK
+	.if PLATFORM_CORE_COUNT > 1
+		ldcopr	r1, MPIDR
+		and	r0, r1, #MPIDR_CPU_MASK
+		.if PLATFORM_CLUSTER_COUNT > 1
+			and	r1, r1, #MPIDR_CLUSTER_MASK
+			orr	r0, r0, r1, LSR #(MPIDR_AFFINITY_BITS - \
+						  PLATFORM_CPU_PER_CLUSTER_SHIFT)
+		.endif
+	.else
+		/* There is just a single core so always 0 */
+		mov r0, #0
+	.endif
 	bx	lr
 endfunc plat_my_core_pos
 
@@ -102,7 +116,7 @@
 	 * Cold boot: Disable TCM redirect to L2 cache as early as
 	 * possible to avoid crashes when making use of the cache.
 	 */
-	ldr	r1, =APCS_CFG
+	ldr	r1, =APCS_CFG(0)
 	ldr	r2, [r1, #APCS_TCM_START_ADDR]
 	and	r2, r2, #~APCS_TCM_REDIRECT_EN_0
 	str	r2, [r1, #APCS_TCM_START_ADDR]
diff --git a/plat/qti/msm8916/aarch64/msm8916_helpers.S b/plat/qti/msm8916/aarch64/msm8916_helpers.S
index bccc5e5..de9438a 100644
--- a/plat/qti/msm8916/aarch64/msm8916_helpers.S
+++ b/plat/qti/msm8916/aarch64/msm8916_helpers.S
@@ -6,10 +6,15 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <platform_def.h>
 
 #include <msm8916_mmap.h>
 
+#if PLATFORM_CORE_COUNT > 1
 #define APCS_TCM_START_ADDR	0x10
+#else
+#define APCS_TCM_START_ADDR	0x34
+#endif
 #define APCS_TCM_REDIRECT_EN_0	BIT_32(0)
 
 	.globl	plat_crash_console_init
@@ -78,9 +83,18 @@
 	 * -------------------------------------------------
 	 */
 func plat_my_core_pos
-	/* There is just a single cluster so this is very simple */
-	mrs	x0, mpidr_el1
-	and	x0, x0, #MPIDR_CPU_MASK
+	.if PLATFORM_CORE_COUNT > 1
+		mrs	x1, mpidr_el1
+		and	x0, x1, #MPIDR_CPU_MASK
+		.if PLATFORM_CLUSTER_COUNT > 1
+			and	x1, x1, #MPIDR_CLUSTER_MASK
+			orr	x0, x0, x1, LSR #(MPIDR_AFFINITY_BITS - \
+						  PLATFORM_CPU_PER_CLUSTER_SHIFT)
+		.endif
+	.else
+		/* There is just a single core so always 0 */
+		mov	x0, #0
+	.endif
 	ret
 endfunc plat_my_core_pos
 
@@ -100,7 +114,7 @@
 	 * Cold boot: Disable TCM redirect to L2 cache as early as
 	 * possible to avoid crashes when making use of the cache.
 	 */
-	mov_imm	x1, APCS_CFG
+	mov_imm	x1, APCS_CFG(0)
 	ldr	w2, [x1, #APCS_TCM_START_ADDR]
 	and	w2, w2, #~APCS_TCM_REDIRECT_EN_0
 	str	w2, [x1, #APCS_TCM_START_ADDR]
diff --git a/plat/qti/msm8916/include/msm8916_mmap.h b/plat/qti/msm8916/include/msm8916_mmap.h
index 35e3b86..20c5a57 100644
--- a/plat/qti/msm8916/include/msm8916_mmap.h
+++ b/plat/qti/msm8916/include/msm8916_mmap.h
@@ -22,6 +22,7 @@
 
 #define APPS_SMMU_BASE		(PCNOC_BASE + 0x1e00000)
 #define APPS_SMMU_QCOM		(APPS_SMMU_BASE + 0xf0000)
+#define GPU_SMMU_BASE		(PCNOC_BASE + 0x1f00000)
 
 #define BLSP1_BASE		(PCNOC_BASE + 0x7880000)
 #define BLSP1_UART_BASE(n)	(BLSP1_BASE + 0x2f000 + (((n) - 1) * 0x1000))
@@ -32,11 +33,19 @@
 #define APCS_QGIC2_GICC		(APCS_QGIC2_BASE + 0x2000)
 #define APCS_BANKED_ACS		(APCS_BASE + 0x08000)
 #define APCS_BANKED_SAW2	(APCS_BASE + 0x09000)
-#define APCS_CFG		(APCS_BASE + 0x10000)
-#define APCS_GLB		(APCS_BASE + 0x11000)
-#define APCS_L2_SAW2		(APCS_BASE + 0x12000)
-#define APCS_QTMR		(APCS_BASE + 0x20000)
-#define APCS_ALIAS_ACS(cpu)	(APCS_BASE + 0x88000 + ((cpu) * 0x10000))
-#define APCS_ALIAS_SAW2(cpu)	(APCS_BASE + 0x89000 + ((cpu) * 0x10000))
+
+#define _APCS_CLUSTER(cluster)	(APCS_BASE + ((cluster) * 0x100000))
+#define _APCS_CPU(cluster, cpu)	(_APCS_CLUSTER(cluster) + ((cpu) * 0x10000))
+#define APCS_CFG(cluster)	(_APCS_CLUSTER(cluster) + 0x10000)
+#define APCS_GLB(cluster)	(_APCS_CLUSTER(cluster) + 0x11000)
+#define APCS_L2_SAW2(cluster)	(_APCS_CLUSTER(cluster) + 0x12000)
+#define APCS_QTMR(cluster)	(_APCS_CLUSTER(cluster) + 0x20000)
+#define APCS_ALIAS_ACS(cluster, cpu)	(_APCS_CPU(cluster, cpu) + 0x88000)
+#define APCS_ALIAS_SAW2(cluster, cpu)	(_APCS_CPU(cluster, cpu) + 0x89000)
+
+/* Only on platforms with multiple clusters (e.g. MSM8939) */
+#define APCS_CCI_BASE		(APCS_BASE + 0x1c0000)
+#define APCS_CCI_SAW2		(APCS_BASE + 0x1d2000)
+#define APCS_CCI_ACS		(APCS_BASE + 0x1d4000)
 
 #endif /* MSM8916_MMAP_H */
diff --git a/plat/qti/msm8916/include/platform_def.h b/plat/qti/msm8916/include/platform_def.h
index f6ba1cc..a5baacd 100644
--- a/plat/qti/msm8916/include/platform_def.h
+++ b/plat/qti/msm8916/include/platform_def.h
@@ -25,11 +25,20 @@
 #define CACHE_WRITEBACK_GRANULE		U(64)
 #define PLATFORM_STACK_SIZE		SZ_4K
 
-/* CPU topology: single cluster with 4 cores */
+/* CPU topology: one or two clusters with 4 cores each */
+#ifdef PLAT_msm8939
+#define PLATFORM_CLUSTER_COUNT		U(2)
+#else
 #define PLATFORM_CLUSTER_COUNT		U(1)
-#define PLATFORM_MAX_CPUS_PER_CLUSTER	U(4)
+#endif
+#if defined(PLAT_mdm9607)
+#define PLATFORM_CPU_PER_CLUSTER_SHIFT	U(0)	/* 1 */
+#else
+#define PLATFORM_CPU_PER_CLUSTER_SHIFT	U(2)	/* 4 */
+#endif
+#define PLATFORM_CPUS_PER_CLUSTER	(1 << PLATFORM_CPU_PER_CLUSTER_SHIFT)
 #define PLATFORM_CORE_COUNT		(PLATFORM_CLUSTER_COUNT * \
-					 PLATFORM_MAX_CPUS_PER_CLUSTER)
+					 PLATFORM_CPUS_PER_CLUSTER)
 
 /* Power management */
 #define PLATFORM_SYSTEM_COUNT		U(1)
diff --git a/plat/qti/msm8916/msm8916_bl31_setup.c b/plat/qti/msm8916/msm8916_bl31_setup.c
index 449be7f..c588020 100644
--- a/plat/qti/msm8916/msm8916_bl31_setup.c
+++ b/plat/qti/msm8916/msm8916_bl31_setup.c
@@ -33,6 +33,7 @@
 				u_register_t arg2, u_register_t arg3)
 {
 	msm8916_early_platform_setup();
+	msm8916_configure_early();
 }
 
 void bl31_plat_arch_setup(void)
diff --git a/plat/qti/msm8916/msm8916_config.c b/plat/qti/msm8916/msm8916_config.c
index 350ed5c..0ac604b 100644
--- a/plat/qti/msm8916/msm8916_config.c
+++ b/plat/qti/msm8916/msm8916_config.c
@@ -7,6 +7,7 @@
 #include <assert.h>
 
 #include <arch.h>
+#include <drivers/arm/cci.h>
 #include <lib/mmio.h>
 
 #include "msm8916_config.h"
@@ -14,13 +15,23 @@
 #include <msm8916_mmap.h>
 #include <platform_def.h>
 
-static void msm8916_configure_timer(void)
+static const int cci_map[] = { 3, 4 };
+
+void msm8916_configure_early(void)
+{
+	if (PLATFORM_CLUSTER_COUNT > 1) {
+		cci_init(APCS_CCI_BASE, cci_map, ARRAY_SIZE(cci_map));
+		cci_enable_snoop_dvm_reqs(MPIDR_AFFLVL1_VAL(read_mpidr_el1()));
+	}
+}
+
+static void msm8916_configure_timer(uintptr_t base)
 {
 	/* Set timer frequency */
-	mmio_write_32(APCS_QTMR + CNTCTLBASE_CNTFRQ, PLAT_SYSCNT_FREQ);
+	mmio_write_32(base + CNTCTLBASE_CNTFRQ, PLAT_SYSCNT_FREQ);
 
 	/* Make all timer frames available to non-secure world */
-	mmio_write_32(APCS_QTMR + CNTNSAR, GENMASK_32(7, 0));
+	mmio_write_32(base + CNTNSAR, GENMASK_32(7, 0));
 }
 
 /*
@@ -30,16 +41,21 @@
  */
 #define APCS_GLB_SECURE_STS_NS		BIT_32(0)
 #define APCS_GLB_SECURE_PWR_NS		BIT_32(1)
-#define APCS_BOOT_START_ADDR_SEC	(APCS_CFG + 0x04)
+#if PLATFORM_CORE_COUNT > 1
+#define APCS_BOOT_START_ADDR_SEC	0x04
+#define APCS_AA64NAA32_REG		0x0c
+#else
+#define APCS_BOOT_START_ADDR_SEC	0x18
+#endif
 #define REMAP_EN			BIT_32(0)
-#define APCS_AA64NAA32_REG		(APCS_CFG + 0x0c)
 
-static void msm8916_configure_cpu_pm(void)
+static void msm8916_configure_apcs_cluster(unsigned int cluster)
 {
+	uintptr_t cfg = APCS_CFG(cluster);
 	unsigned int cpu;
 
 	/* Disallow non-secure access to boot remapper / TCM registers */
-	mmio_write_32(APCS_CFG, 0);
+	mmio_write_32(cfg, 0);
 
 	/*
 	 * Disallow non-secure access to power management registers.
@@ -47,27 +63,51 @@
 	 * to CPU frequency related registers (e.g. APCS_CMD_RCGR). If these
 	 * bits are not set, CPU frequency control fails in the non-secure world.
 	 */
-	mmio_write_32(APCS_GLB, APCS_GLB_SECURE_STS_NS | APCS_GLB_SECURE_PWR_NS);
+	mmio_write_32(APCS_GLB(cluster),
+		      APCS_GLB_SECURE_STS_NS | APCS_GLB_SECURE_PWR_NS);
 
-	/* Disallow non-secure access to L2 SAW2 */
-	mmio_write_32(APCS_L2_SAW2, 0);
+	if (PLATFORM_CORE_COUNT > 1) {
+		/* Disallow non-secure access to L2 SAW2 */
+		mmio_write_32(APCS_L2_SAW2(cluster), 0);
 
-	/* Disallow non-secure access to CPU ACS and SAW2 */
-	for (cpu = 0; cpu < PLATFORM_CORE_COUNT; cpu++) {
-		mmio_write_32(APCS_ALIAS_ACS(cpu), 0);
-		mmio_write_32(APCS_ALIAS_SAW2(cpu), 0);
+		/* Disallow non-secure access to CPU ACS and SAW2 */
+		for (cpu = 0; cpu < PLATFORM_CPUS_PER_CLUSTER; cpu++) {
+			mmio_write_32(APCS_ALIAS_ACS(cluster, cpu), 0);
+			mmio_write_32(APCS_ALIAS_SAW2(cluster, cpu), 0);
+		}
+	} else {
+		/* There is just one core so no aliases exist */
+		mmio_write_32(APCS_BANKED_ACS, 0);
+		mmio_write_32(APCS_BANKED_SAW2, 0);
 	}
 
 #ifdef __aarch64__
 	/* Make sure all further warm boots end up in BL31 and aarch64 state */
 	CASSERT((BL31_BASE & 0xffff) == 0, assert_bl31_base_64k_aligned);
-	mmio_write_32(APCS_BOOT_START_ADDR_SEC, BL31_BASE | REMAP_EN);
-	mmio_write_32(APCS_AA64NAA32_REG, 1);
+	mmio_write_32(cfg + APCS_BOOT_START_ADDR_SEC, BL31_BASE | REMAP_EN);
+	mmio_write_32(cfg + APCS_AA64NAA32_REG, 1);
 #else
 	/* Make sure all further warm boots end up in BL32 */
 	CASSERT((BL32_BASE & 0xffff) == 0, assert_bl32_base_64k_aligned);
-	mmio_write_32(APCS_BOOT_START_ADDR_SEC, BL32_BASE | REMAP_EN);
+	mmio_write_32(cfg + APCS_BOOT_START_ADDR_SEC, BL32_BASE | REMAP_EN);
 #endif
+
+	msm8916_configure_timer(APCS_QTMR(cluster));
+}
+
+static void msm8916_configure_apcs(void)
+{
+	unsigned int cluster;
+
+	for (cluster = 0; cluster < PLATFORM_CLUSTER_COUNT; cluster++) {
+		msm8916_configure_apcs_cluster(cluster);
+	}
+
+	if (PLATFORM_CLUSTER_COUNT > 1) {
+		/* Disallow non-secure access to CCI ACS and SAW2 */
+		mmio_write_32(APCS_CCI_ACS, 0);
+		mmio_write_32(APCS_CCI_SAW2, 0);
+	}
 }
 
 /*
@@ -77,30 +117,79 @@
  * by default to avoid special setup on the non-secure side.
  */
 #define CLK_OFF					BIT_32(31)
+#define GCC_APSS_TCU_CBCR			(GCC_BASE + 0x12018)
+#define GCC_GFX_TCU_CBCR			(GCC_BASE + 0x12020)
 #define GCC_SMMU_CFG_CBCR			(GCC_BASE + 0x12038)
+#define GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE	(GCC_BASE + 0x3600c)
 #define GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE	(GCC_BASE + 0x4500c)
+#define APSS_TCU_CLK_ENA			BIT_32(1)
+#define GFX_TCU_CLK_ENA				BIT_32(2)
+#define GFX_TBU_CLK_ENA				BIT_32(3)
 #define SMMU_CFG_CLK_ENA			BIT_32(12)
 #define APPS_SMMU_INTR_SEL_NS			(APPS_SMMU_QCOM + 0x2000)
 #define APPS_SMMU_INTR_SEL_NS_EN_ALL		U(0xffffffff)
 
+#define SMMU_SACR				0x010
+#define SMMU_SACR_CACHE_LOCK			BIT_32(26)
+#define SMMU_IDR7				0x03c
+#define SMMU_IDR7_MINOR(val)			(((val) >> 0) & 0xf)
+#define SMMU_IDR7_MAJOR(val)			(((val) >> 4) & 0xf)
+
+static void msm8916_smmu_cache_unlock(uintptr_t smmu_base, uintptr_t clk_cbcr)
+{
+	uint32_t version;
+
+	/* Wait for clock */
+	while (mmio_read_32(clk_cbcr) & CLK_OFF) {
+	}
+
+	version = mmio_read_32(smmu_base + SMMU_IDR7);
+	VERBOSE("SMMU(0x%lx) r%dp%d\n", smmu_base,
+		SMMU_IDR7_MAJOR(version), SMMU_IDR7_MINOR(version));
+
+	/* For SMMU r2p0+ clear CACHE_LOCK to allow writes to CBn_ACTLR */
+	if (SMMU_IDR7_MAJOR(version) >= 2) {
+		mmio_clrbits_32(smmu_base + SMMU_SACR, SMMU_SACR_CACHE_LOCK);
+	}
+}
+
 static void msm8916_configure_smmu(void)
 {
-	/* Enable SMMU configuration clock to enable register access */
-	mmio_setbits_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, SMMU_CFG_CLK_ENA);
-	while (mmio_read_32(GCC_SMMU_CFG_CBCR) & CLK_OFF)
-		;
+	uint32_t ena_bits = APSS_TCU_CLK_ENA | SMMU_CFG_CLK_ENA;
+
+	/* Single core (MDM) platforms do not have a GPU */
+	if (PLATFORM_CORE_COUNT > 1) {
+		ena_bits |= GFX_TCU_CLK_ENA | GFX_TBU_CLK_ENA;
+	}
+
+	/* Enable SMMU clocks to enable register access */
+	mmio_write_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, ena_bits);
+
+	/* Wait for configuration clock */
+	while (mmio_read_32(GCC_SMMU_CFG_CBCR) & CLK_OFF) {
+	}
 
 	/* Route all context bank interrupts to non-secure interrupt */
 	mmio_write_32(APPS_SMMU_INTR_SEL_NS, APPS_SMMU_INTR_SEL_NS_EN_ALL);
 
-	/* Disable configuration clock again */
-	mmio_clrbits_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, SMMU_CFG_CLK_ENA);
+	/* Clear sACR.CACHE_LOCK bit if needed for MMU-500 r2p0+ */
+	msm8916_smmu_cache_unlock(APPS_SMMU_BASE, GCC_APSS_TCU_CBCR);
+	if (PLATFORM_CORE_COUNT > 1) {
+		msm8916_smmu_cache_unlock(GPU_SMMU_BASE, GCC_GFX_TCU_CBCR);
+	}
+
+	/*
+	 * Keep APCS vote for SMMU clocks for rest of booting process, but make
+	 * sure other vote registers (such as RPM) do not keep permanent votes.
+	 */
+	VERBOSE("Clearing GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE (was: 0x%x)\n",
+		mmio_read_32(GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE));
+	mmio_write_32(GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE, 0);
 }
 
 void msm8916_configure(void)
 {
 	msm8916_gicv2_configure();
-	msm8916_configure_timer();
-	msm8916_configure_cpu_pm();
+	msm8916_configure_apcs();
 	msm8916_configure_smmu();
 }
diff --git a/plat/qti/msm8916/msm8916_config.h b/plat/qti/msm8916/msm8916_config.h
index 992625b..977d02c 100644
--- a/plat/qti/msm8916/msm8916_config.h
+++ b/plat/qti/msm8916/msm8916_config.h
@@ -8,5 +8,6 @@
 #define MSM8916_CONFIG_H
 
 void msm8916_configure(void);
+void msm8916_configure_early(void);
 
 #endif /* MSM8916_CONFIG_H */
diff --git a/plat/qti/msm8916/msm8916_cpu_boot.c b/plat/qti/msm8916/msm8916_cpu_boot.c
index b3f51f6..d6faa59 100644
--- a/plat/qti/msm8916/msm8916_cpu_boot.c
+++ b/plat/qti/msm8916/msm8916_cpu_boot.c
@@ -1,14 +1,14 @@
 /*
- * Copyright (c) 2021, Stephan Gerhold <stephan@gerhold.net>
+ * Copyright (c) 2021-2022, Stephan Gerhold <stephan@gerhold.net>
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch_helpers.h>
+#include <common/debug.h>
 #include <drivers/delay_timer.h>
 #include <lib/mmio.h>
 
-#include <msm8916_mmap.h>
 #include "msm8916_pm.h"
 
 #define CPU_PWR_CTL			0x4
@@ -26,12 +26,39 @@
 #define APC_PWR_GATE_CTL_GHDS_EN	BIT_32(0)
 #define APC_PWR_GATE_CTL_GHDS_CNT(cnt)	((cnt) << 24)
 
+#define PWR_CTL_OVERRIDE		0xc
+#define L2_PWR_CTL			0x14
+#define L2_PWR_STATUS			0x18
+#define CORE_CBCR			0x58
+
+#define PWR_CTL_OVERRIDE_PRESETDBG	BIT_32(22)
+
+#define L2_PWR_CTL_L2_ARRAY_HS		BIT_32(0)
+#define L2_PWR_CTL_SCU_ARRAY_HS		BIT_32(1)
+#define L2_PWR_CTL_L2_RST_DIS		BIT_32(2)
+#define L2_PWR_CTL_L2_HS_CLAMP		BIT_32(8)
+#define L2_PWR_CTL_L2_HS_EN		BIT_32(9)
+#define L2_PWR_CTL_L2_HS_RST		BIT_32(10)
+#define L2_PWR_CTL_L2_SLEEP_STATE	BIT_32(11)
+#define L2_PWR_CTL_SYS_RESET		BIT_32(12)
+#define L2_PWR_CTL_L2_RET_SLP		BIT_32(13)
+#define L2_PWR_CTL_SCU_ARRAY_HS_CLAMP	BIT_32(14)
+#define L2_PWR_CTL_L2_ARRAY_HS_CLAMP	BIT_32(15)
+#define L2_PWR_CTL_L2_HS_CNT(cnt)	((cnt) << 16)
+#define L2_PWR_CTL_PMIC_APC_ON		BIT_32(28)
+
+#define L2_PWR_STATUS_L2_HS_STS		BIT_32(9)
+
+#define CORE_CBCR_CLK_ENABLE		BIT_32(0)
+#define CORE_CBCR_HW_CTL		BIT_32(1)
+
 /* Boot a secondary CPU core for the first time. */
-void msm8916_cpu_boot(unsigned int core)
+void msm8916_cpu_boot(uintptr_t acs)
 {
-	uintptr_t acs = APCS_ALIAS_ACS(core);
 	uint32_t pwr_ctl;
 
+	VERBOSE("PSCI: Powering on CPU @ 0x%08lx\n", acs);
+
 	pwr_ctl = CPU_PWR_CTL_CLAMP | CPU_PWR_CTL_CORE_MEM_CLAMP |
 		  CPU_PWR_CTL_CORE_RST | CPU_PWR_CTL_COREPOR_RST;
 	mmio_write_32(acs + CPU_PWR_CTL, pwr_ctl);
@@ -64,3 +91,60 @@
 	mmio_write_32(acs + CPU_PWR_CTL, pwr_ctl);
 	dsb();
 }
+
+/* Power on cluster L2 cache for the first time. */
+void msm8916_l2_boot(uintptr_t base)
+{
+	uint32_t pwr_ctl, cbcr, ovr;
+
+	/* Skip if cluster L2 is already powered on */
+	if (mmio_read_32(base + L2_PWR_STATUS) & L2_PWR_STATUS_L2_HS_STS) {
+		VERBOSE("PSCI: L2 cache @ 0x%08lx is already powered on\n", base);
+		return;
+	}
+
+	VERBOSE("PSCI: Powering on L2 cache @ 0x%08lx\n", base);
+
+	pwr_ctl = L2_PWR_CTL_L2_HS_CLAMP | L2_PWR_CTL_L2_HS_EN |
+		  L2_PWR_CTL_L2_HS_RST | L2_PWR_CTL_SYS_RESET |
+		  L2_PWR_CTL_SCU_ARRAY_HS_CLAMP | L2_PWR_CTL_L2_ARRAY_HS_CLAMP |
+		  L2_PWR_CTL_L2_HS_CNT(16);
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+
+	ovr = PWR_CTL_OVERRIDE_PRESETDBG;
+	mmio_write_32(base + PWR_CTL_OVERRIDE, ovr);
+	dsb();
+	udelay(2);
+
+	pwr_ctl &= ~(L2_PWR_CTL_SCU_ARRAY_HS_CLAMP |
+		     L2_PWR_CTL_L2_ARRAY_HS_CLAMP);
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+
+	pwr_ctl |= (L2_PWR_CTL_L2_ARRAY_HS | L2_PWR_CTL_SCU_ARRAY_HS);
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+	dsb();
+	udelay(2);
+
+	cbcr = CORE_CBCR_CLK_ENABLE;
+	mmio_write_32(base + CORE_CBCR, cbcr);
+
+	pwr_ctl &= ~L2_PWR_CTL_L2_HS_CLAMP;
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+	dsb();
+	udelay(2);
+
+	ovr &= ~PWR_CTL_OVERRIDE_PRESETDBG;
+	mmio_write_32(base + PWR_CTL_OVERRIDE, ovr);
+
+	pwr_ctl &= ~(L2_PWR_CTL_L2_HS_RST | L2_PWR_CTL_SYS_RESET);
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+	dsb();
+	udelay(54);
+
+	pwr_ctl |= L2_PWR_CTL_PMIC_APC_ON;
+	mmio_write_32(base + L2_PWR_CTL, pwr_ctl);
+
+	cbcr |= CORE_CBCR_HW_CTL;
+	mmio_write_32(base + CORE_CBCR, cbcr);
+	dsb();
+}
diff --git a/plat/qti/msm8916/msm8916_pm.c b/plat/qti/msm8916/msm8916_pm.c
index 792a096..fd44f04 100644
--- a/plat/qti/msm8916/msm8916_pm.c
+++ b/plat/qti/msm8916/msm8916_pm.c
@@ -4,9 +4,12 @@
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <assert.h>
+
 #include <arch.h>
 #include <arch_helpers.h>
 #include <common/debug.h>
+#include <drivers/arm/cci.h>
 #include <drivers/arm/gicv2.h>
 #include <drivers/delay_timer.h>
 #include <lib/mmio.h>
@@ -16,18 +19,51 @@
 #include <msm8916_mmap.h>
 #include "msm8916_pm.h"
 
+/*
+ * On platforms with two clusters the index of the APCS memory region is swapped
+ * compared to the MPIDR cluster affinity level: APCS cluster 0 manages CPUs
+ * with cluster affinity level 1, while APCS cluster 1 manages CPUs with level 0.
+ *
+ * On platforms with a single cluster there is only one APCS memory region.
+ */
+#if PLATFORM_CLUSTER_COUNT == 2
+#define MPIDR_APCS_CLUSTER(mpidr)	!MPIDR_AFFLVL1_VAL(mpidr)
+#else
+#define MPIDR_APCS_CLUSTER(mpidr)	0
+#endif
+
+#define CLUSTER_PWR_STATE(state) ((state)->pwr_domain_state[MPIDR_AFFLVL1])
+
 static int msm8916_pwr_domain_on(u_register_t mpidr)
 {
-	unsigned int core = MPIDR_AFFLVL0_VAL(mpidr);
+	/* Should be never called on single-core platforms */
+	if (PLATFORM_CORE_COUNT == 1) {
+		assert(false);
+		return PSCI_E_ALREADY_ON;
+	}
 
-	VERBOSE("PSCI: Booting CPU %d\n", core);
-	msm8916_cpu_boot(core);
-
+	/* Power on L2 cache and secondary CPU core for the first time */
+	if (PLATFORM_CLUSTER_COUNT > 1) {
+		msm8916_l2_boot(APCS_GLB(MPIDR_APCS_CLUSTER(mpidr)));
+	}
+	msm8916_cpu_boot(APCS_ALIAS_ACS(MPIDR_APCS_CLUSTER(mpidr),
+					MPIDR_AFFLVL0_VAL(mpidr)));
 	return PSCI_E_SUCCESS;
 }
 
 static void msm8916_pwr_domain_on_finish(const psci_power_state_t *target_state)
 {
+	/* Should be never called on single-core platforms */
+	if (PLATFORM_CORE_COUNT == 1) {
+		assert(false);
+		return;
+	}
+
+	if (PLATFORM_CLUSTER_COUNT > 1 &&
+	    CLUSTER_PWR_STATE(target_state) == PLAT_MAX_OFF_STATE) {
+		cci_enable_snoop_dvm_reqs(MPIDR_AFFLVL1_VAL(read_mpidr_el1()));
+	}
+
 	gicv2_pcpu_distif_init();
 	gicv2_cpuif_enable();
 }
diff --git a/plat/qti/msm8916/msm8916_pm.h b/plat/qti/msm8916/msm8916_pm.h
index 5473bfa..f301d3c 100644
--- a/plat/qti/msm8916/msm8916_pm.h
+++ b/plat/qti/msm8916/msm8916_pm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Stephan Gerhold <stephan@gerhold.net>
+ * Copyright (c) 2021-2022, Stephan Gerhold <stephan@gerhold.net>
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,7 @@
 #ifndef MSM8916_PM_H
 #define MSM8916_PM_H
 
-void msm8916_cpu_boot(unsigned int core);
+void msm8916_cpu_boot(uintptr_t acs);
+void msm8916_l2_boot(uintptr_t base);
 
 #endif /* MSM8916_PM_H */
diff --git a/plat/qti/msm8916/msm8916_setup.c b/plat/qti/msm8916/msm8916_setup.c
index 26039a9..69c0d78 100644
--- a/plat/qti/msm8916/msm8916_setup.c
+++ b/plat/qti/msm8916/msm8916_setup.c
@@ -47,7 +47,14 @@
 };
 
 static const struct uartdm_gpios uartdm_gpio_map[] = {
+#if defined(PLAT_msm8909)
+	{4, 5, 0x2}, {20, 21, 0x3},
+#elif defined(PLAT_msm8916) || defined(PLAT_msm8939)
 	{0, 1, 0x2}, {4, 5, 0x2},
+#elif defined(PLAT_mdm9607)
+	{12, 13, 0x2}, {4, 5, 0x2}, {0, 1, 0x1},
+	{16, 17, 0x2}, {8, 9, 0x2}, {20, 21, 0x2},
+#endif
 };
 
 /*
@@ -69,13 +76,13 @@
 
 	/* Enable AHB clock */
 	mmio_setbits_32(GCC_APCS_CLOCK_BRANCH_ENA_VOTE, BLSP1_AHB_CLK_ENA);
-	while (mmio_read_32(GCC_BLSP1_AHB_CBCR) & CLK_OFF)
-		;
+	while (mmio_read_32(GCC_BLSP1_AHB_CBCR) & CLK_OFF) {
+	}
 
 	/* Enable BLSP UART clock */
 	mmio_setbits_32(GCC_BLSP1_UART_APPS_CBCR(QTI_UART_NUM), CLK_ENABLE);
-	while (mmio_read_32(GCC_BLSP1_UART_APPS_CBCR(QTI_UART_NUM)) & CLK_OFF)
-		;
+	while (mmio_read_32(GCC_BLSP1_UART_APPS_CBCR(QTI_UART_NUM)) & CLK_OFF) {
+	}
 }
 
 void msm8916_early_platform_setup(void)
diff --git a/plat/qti/msm8916/msm8916_topology.c b/plat/qti/msm8916/msm8916_topology.c
index 4d0ed8f..d8cdc0e 100644
--- a/plat/qti/msm8916/msm8916_topology.c
+++ b/plat/qti/msm8916/msm8916_topology.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2022, Stephan Gerhold <stephan@gerhold.net>
  * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
@@ -9,24 +10,27 @@
 
 #include <platform_def.h>
 
-static const unsigned char plat_power_domain_tree_desc[PLAT_MAX_PWR_LVL + 1] = {
+static const unsigned char plat_power_domain_tree_desc[] = {
 	PLATFORM_SYSTEM_COUNT,
 	PLATFORM_CLUSTER_COUNT,
-	PLATFORM_MAX_CPUS_PER_CLUSTER,
+	PLATFORM_CPUS_PER_CLUSTER,
+#if PLATFORM_CLUSTER_COUNT > 1
+	PLATFORM_CPUS_PER_CLUSTER,
+#endif
 };
 
 int plat_core_pos_by_mpidr(u_register_t mpidr)
 {
+	unsigned int cluster = MPIDR_AFFLVL1_VAL(mpidr);
 	unsigned int core = MPIDR_AFFLVL0_VAL(mpidr);
 
 	if (MPIDR_AFFLVL3_VAL(mpidr) > 0 ||
 	    MPIDR_AFFLVL2_VAL(mpidr) > 0 ||
-	    MPIDR_AFFLVL1_VAL(mpidr) > 0 ||
-	    core >= PLATFORM_MAX_CPUS_PER_CLUSTER) {
+	    cluster >= PLATFORM_CLUSTER_COUNT ||
+	    core >= PLATFORM_CPUS_PER_CLUSTER) {
 		return -1;
 	}
-
-	return core;
+	return core | (cluster << PLATFORM_CPU_PER_CLUSTER_SHIFT);
 }
 
 const unsigned char *plat_get_power_domain_tree_desc(void)
diff --git a/plat/qti/msm8916/platform.mk b/plat/qti/msm8916/platform.mk
index 7e698fb..4f4dcb4 100644
--- a/plat/qti/msm8916/platform.mk
+++ b/plat/qti/msm8916/platform.mk
@@ -17,7 +17,9 @@
 				plat/qti/msm8916/${ARCH}/msm8916_helpers.S	\
 				plat/qti/msm8916/${ARCH}/uartdm_console.S
 
-MSM8916_PM_SOURCES	:=	lib/cpus/${ARCH}/cortex_a53.S			\
+MSM8916_CPU		:=	$(if ${ARM_CORTEX_A7},cortex_a7,cortex_a53)
+MSM8916_PM_SOURCES	:=	drivers/arm/cci/cci.c				\
+				lib/cpus/${ARCH}/${MSM8916_CPU}.S		\
 				plat/common/plat_psci_common.c			\
 				plat/qti/msm8916/msm8916_config.c		\
 				plat/qti/msm8916/msm8916_cpu_boot.c		\
@@ -48,11 +50,14 @@
 ENABLE_SPE_FOR_NS		:= 0
 ENABLE_SVE_FOR_NS		:= 0
 
-# Disable workarounds unnecessary for Cortex-A53
+# Disable workarounds unnecessary for Cortex-A7/A53
 WORKAROUND_CVE_2017_5715	:= 0
 WORKAROUND_CVE_2022_23960	:= 0
 
-# MSM8916 uses ARM Cortex-A53 r0p0 so likely all the errata apply
+ifeq (${MSM8916_CPU},cortex_a53)
+# The Cortex-A53 revision varies depending on the SoC revision.
+# msm8916 uses r0p0, msm8939 uses r0p1 or r0p4. Enable all errata
+# and rely on the runtime detection to apply them only if needed.
 ERRATA_A53_819472		:= 1
 ERRATA_A53_824069		:= 1
 ERRATA_A53_826319		:= 1
@@ -60,8 +65,9 @@
 ERRATA_A53_835769		:= 1
 ERRATA_A53_836870		:= 1
 ERRATA_A53_843419		:= 1
-ERRATA_A53_855873		:= 0	# Workaround works only for >= r0p3
+ERRATA_A53_855873		:= 1
 ERRATA_A53_1530924		:= 1
+endif
 
 # Build config flags
 # ------------------
diff --git a/plat/qti/msm8916/sp_min/msm8916_sp_min_setup.c b/plat/qti/msm8916/sp_min/msm8916_sp_min_setup.c
index 78ab0c7..3c93305 100644
--- a/plat/qti/msm8916/sp_min/msm8916_sp_min_setup.c
+++ b/plat/qti/msm8916/sp_min/msm8916_sp_min_setup.c
@@ -27,6 +27,7 @@
 				  u_register_t arg2, u_register_t arg3)
 {
 	msm8916_early_platform_setup();
+	msm8916_configure_early();
 }
 
 void sp_min_plat_arch_setup(void)
diff --git a/plat/qti/msm8939/platform.mk b/plat/qti/msm8939/platform.mk
new file mode 100644
index 0000000..9bf6d4d
--- /dev/null
+++ b/plat/qti/msm8939/platform.mk
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+# Cache cannot be enabled early on MSM8939 because the CCI-400 must be
+# enabled before the CPUs in both clusters become cache-coherent.
+override WARMBOOT_ENABLE_DCACHE_EARLY := 0
+
+include plat/qti/msm8916/platform.mk
diff --git a/plat/qti/msm8939/sp_min/sp_min-msm8939.mk b/plat/qti/msm8939/sp_min/sp_min-msm8939.mk
new file mode 100644
index 0000000..28a6f01
--- /dev/null
+++ b/plat/qti/msm8939/sp_min/sp_min-msm8939.mk
@@ -0,0 +1,7 @@
+#
+# Copyright (c) 2022-2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+include plat/qti/msm8916/sp_min/sp_min-msm8916.mk
diff --git a/plat/qti/msm8939/tsp/tsp-msm8939.mk b/plat/qti/msm8939/tsp/tsp-msm8939.mk
new file mode 100644
index 0000000..4eefc64
--- /dev/null
+++ b/plat/qti/msm8939/tsp/tsp-msm8939.mk
@@ -0,0 +1,7 @@
+#
+# Copyright (c) 2023, Stephan Gerhold <stephan@gerhold.net>
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+include plat/qti/msm8916/tsp/tsp-msm8916.mk
diff --git a/plat/xilinx/common/include/plat_fdt.h b/plat/xilinx/common/include/plat_fdt.h
new file mode 100644
index 0000000..a1ee1e1
--- /dev/null
+++ b/plat/xilinx/common/include/plat_fdt.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+#ifndef PLAT_FDT_H
+#define PLAT_FDT_H
+
+void prepare_dtb(void);
+
+#endif /* PLAT_FDT_H */
diff --git a/plat/xilinx/common/plat_fdt.c b/plat/xilinx/common/plat_fdt.c
new file mode 100644
index 0000000..3d12d51
--- /dev/null
+++ b/plat/xilinx/common/plat_fdt.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+#include <common/fdt_fixup.h>
+#include <common/fdt_wrappers.h>
+
+#include <plat_fdt.h>
+#include <platform_def.h>
+
+#if (defined(XILINX_OF_BOARD_DTB_ADDR) && !IS_TFA_IN_OCM(BL31_BASE))
+void prepare_dtb(void)
+{
+	void *dtb = (void *)XILINX_OF_BOARD_DTB_ADDR;
+	int ret;
+
+	/* Return if no device tree is detected */
+	if (fdt_check_header(dtb) != 0) {
+		NOTICE("Can't read DT at %p\n", dtb);
+		return;
+	}
+
+	ret = fdt_open_into(dtb, dtb, XILINX_OF_BOARD_DTB_MAX_SIZE);
+	if (ret < 0) {
+		ERROR("Invalid Device Tree at %p: error %d\n", dtb, ret);
+		return;
+	}
+
+	/* Reserve memory used by Trusted Firmware. */
+	if (fdt_add_reserved_memory(dtb, "tf-a", BL31_BASE, BL31_LIMIT - BL31_BASE + 1)) {
+		WARN("Failed to add reserved memory nodes for BL31 to DT.\n");
+		return;
+	}
+
+	ret = fdt_pack(dtb);
+	if (ret < 0) {
+		ERROR("Failed to pack Device Tree at %p: error %d\n", dtb, ret);
+		return;
+	}
+
+	clean_dcache_range((uintptr_t)dtb, fdt_blob_size(dtb));
+	INFO("Changed device tree to advertise PSCI and reserved memories.\n");
+}
+#else
+void prepare_dtb(void)
+{
+}
+#endif
diff --git a/plat/xilinx/versal/bl31_versal_setup.c b/plat/xilinx/versal/bl31_versal_setup.c
index 0ef92a6..6cb529b 100644
--- a/plat/xilinx/versal/bl31_versal_setup.c
+++ b/plat/xilinx/versal/bl31_versal_setup.c
@@ -20,6 +20,7 @@
 #include <plat/common/platform.h>
 #include <plat_arm.h>
 
+#include <plat_fdt.h>
 #include <plat_private.h>
 #include <plat_startup.h>
 #include "pm_api_sys.h"
@@ -191,8 +192,11 @@
 
 	return 0;
 }
+
 void bl31_platform_setup(void)
 {
+	prepare_dtb();
+
 	/* Initialize the gic cpu and distributor interfaces */
 	plat_versal_gic_driver_init();
 	plat_versal_gic_init();
@@ -220,6 +224,10 @@
 	plat_arm_interconnect_enter_coherency();
 
 	const mmap_region_t bl_regions[] = {
+#if (defined(XILINX_OF_BOARD_DTB_ADDR) && !IS_TFA_IN_OCM(BL31_BASE))
+		MAP_REGION_FLAT(XILINX_OF_BOARD_DTB_ADDR, XILINX_OF_BOARD_DTB_MAX_SIZE,
+				MT_MEMORY | MT_RW | MT_NS),
+#endif
 		MAP_REGION_FLAT(BL31_BASE, BL31_END - BL31_BASE,
 			MT_MEMORY | MT_RW | MT_SECURE),
 		MAP_REGION_FLAT(BL_CODE_BASE, BL_CODE_END - BL_CODE_BASE,
diff --git a/plat/xilinx/versal/include/platform_def.h b/plat/xilinx/versal/include/platform_def.h
index 6c1d8b6..4c0df4f 100644
--- a/plat/xilinx/versal/include/platform_def.h
+++ b/plat/xilinx/versal/include/platform_def.h
@@ -76,8 +76,29 @@
  ******************************************************************************/
 #define PLAT_PHY_ADDR_SPACE_SIZE	(1ull << 32)
 #define PLAT_VIRT_ADDR_SPACE_SIZE	(1ull << 32)
+
+#define XILINX_OF_BOARD_DTB_MAX_SIZE	U(0x200000)
+
+#define PLAT_OCM_BSE			U(0xFFFE0000)
+#define PLAT_OCM_LIMIT			U(0xFFFFFFFF)
+
+#define IS_TFA_IN_OCM(x)	((x >= PLAT_OCM_BASE) && (x < PLAT_OCM_LIMIT))
+
+#ifndef MAX_MMAP_REGIONS
+#if (defined(XILINX_OF_BOARD_DTB_ADDR) && !IS_TFA_IN_OCM(BL31_BASE))
+#define MAX_MMAP_REGIONS		9
+#else
 #define MAX_MMAP_REGIONS		8
-#define MAX_XLAT_TABLES			5
+#endif
+#endif
+
+#ifndef MAX_XLAT_TABLES
+#if !IS_TFA_IN_OCM(BL31_BASE)
+#define MAX_XLAT_TABLES		9
+#else
+#define MAX_XLAT_TABLES		5
+#endif
+#endif
 
 #define CACHE_WRITEBACK_SHIFT	6
 #define CACHE_WRITEBACK_GRANULE	(1 << CACHE_WRITEBACK_SHIFT)
diff --git a/plat/xilinx/versal/platform.mk b/plat/xilinx/versal/platform.mk
index 0b6aea0..c936220 100644
--- a/plat/xilinx/versal/platform.mk
+++ b/plat/xilinx/versal/platform.mk
@@ -45,12 +45,17 @@
 VERSAL_PLATFORM ?= silicon
 $(eval $(call add_define_val,VERSAL_PLATFORM,VERSAL_PLATFORM_ID_${VERSAL_PLATFORM}))
 
+ifdef XILINX_OF_BOARD_DTB_ADDR
+$(eval $(call add_define,XILINX_OF_BOARD_DTB_ADDR))
+endif
+
 PLAT_INCLUDES		:=	-Iinclude/plat/arm/common/			\
 				-Iplat/xilinx/common/include/			\
 				-Iplat/xilinx/common/ipi_mailbox_service/	\
 				-Iplat/xilinx/versal/include/			\
 				-Iplat/xilinx/versal/pm_service/
 
+include lib/libfdt/libfdt.mk
 # Include GICv3 driver files
 include drivers/arm/gic/v3/gicv3.mk
 include lib/xlat_tables_v2/xlat_tables.mk
@@ -80,6 +85,7 @@
 				lib/cpus/aarch64/cortex_a72.S			\
 				plat/common/plat_psci_common.c			\
 				plat/xilinx/common/ipi.c			\
+				plat/xilinx/common/plat_fdt.c			\
 				plat/xilinx/common/plat_startup.c		\
 				plat/xilinx/common/ipi_mailbox_service/ipi_mailbox_svc.c \
 				plat/xilinx/common/pm_service/pm_ipi.c		\
@@ -93,7 +99,9 @@
 				plat/xilinx/versal/sip_svc_setup.c		\
 				plat/xilinx/versal/versal_gicv3.c		\
 				plat/xilinx/versal/versal_ipi.c			\
-				plat/xilinx/versal/pm_service/pm_client.c
+				plat/xilinx/versal/pm_service/pm_client.c	\
+				common/fdt_fixup.c				\
+				${LIBFDT_SRCS}
 
 ifeq ($(HARDEN_SLS_ALL), 1)
 TF_CFLAGS_aarch64      +=      -mharden-sls=all
diff --git a/plat/xilinx/versal_net/bl31_versal_net_setup.c b/plat/xilinx/versal_net/bl31_versal_net_setup.c
index 79205a3..d2b6c7b 100644
--- a/plat/xilinx/versal_net/bl31_versal_net_setup.c
+++ b/plat/xilinx/versal_net/bl31_versal_net_setup.c
@@ -12,17 +12,15 @@
 #include <bl31/bl31.h>
 #include <common/bl_common.h>
 #include <common/debug.h>
-#include <common/fdt_fixup.h>
-#include <common/fdt_wrappers.h>
 #include <drivers/arm/dcc.h>
 #include <drivers/arm/pl011.h>
 #include <drivers/console.h>
 #include <lib/mmio.h>
 #include <lib/xlat_tables/xlat_tables_v2.h>
-#include <libfdt.h>
 #include <plat/common/platform.h>
 #include <plat_arm.h>
 
+#include <plat_fdt.h>
 #include <plat_private.h>
 #include <plat_startup.h>
 #include <pm_api_sys.h>
@@ -226,6 +224,8 @@
 
 void bl31_platform_setup(void)
 {
+	prepare_dtb();
+
 	/* Initialize the gic cpu and distributor interfaces */
 	plat_versal_net_gic_driver_init();
 	plat_versal_net_gic_init();
@@ -250,6 +250,10 @@
 void bl31_plat_arch_setup(void)
 {
 	const mmap_region_t bl_regions[] = {
+#if (defined(XILINX_OF_BOARD_DTB_ADDR) && !IS_TFA_IN_OCM(BL31_BASE))
+		MAP_REGION_FLAT(XILINX_OF_BOARD_DTB_ADDR, XILINX_OF_BOARD_DTB_MAX_SIZE,
+				MT_MEMORY | MT_RW | MT_NS),
+#endif
 		MAP_REGION_FLAT(BL31_BASE, BL31_END - BL31_BASE,
 			MT_MEMORY | MT_RW | MT_SECURE),
 		MAP_REGION_FLAT(BL_CODE_BASE, BL_CODE_END - BL_CODE_BASE,
diff --git a/plat/xilinx/versal_net/include/platform_def.h b/plat/xilinx/versal_net/include/platform_def.h
index b256b05..872b6ee 100644
--- a/plat/xilinx/versal_net/include/platform_def.h
+++ b/plat/xilinx/versal_net/include/platform_def.h
@@ -84,13 +84,25 @@
 
 #define PLAT_PHY_ADDR_SPACE_SIZE	(1ULL << 32U)
 #define PLAT_VIRT_ADDR_SPACE_SIZE	(1ULL << 32U)
-#if (BL31_LIMIT < PLAT_DDR_LOWMEM_MAX)
-#define MAX_MMAP_REGIONS		U(10)
+
+#define XILINX_OF_BOARD_DTB_MAX_SIZE	U(0x200000)
+
+#define PLAT_OCM_BASE			U(0xBBF00000)
+#define PLAT_OCM_LIMIT			U(0xBC000000)
+
+#define IS_TFA_IN_OCM(x)	((x >= PLAT_OCM_BASE) && (x < PLAT_OCM_LIMIT))
+
+#ifndef MAX_MMAP_REGIONS
+#if (defined(XILINX_OF_BOARD_DTB_ADDR) && !IS_TFA_IN_OCM(BL31_BASE))
+#define MAX_MMAP_REGIONS		9
 #else
-#define MAX_MMAP_REGIONS		U(9)
+#define MAX_MMAP_REGIONS		8
+#endif
 #endif
 
-#define MAX_XLAT_TABLES			U(8)
+#ifndef MAX_XLAT_TABLES
+#define MAX_XLAT_TABLES			U(9)
+#endif
 
 #define CACHE_WRITEBACK_SHIFT	U(6)
 #define CACHE_WRITEBACK_GRANULE	(1 << CACHE_WRITEBACK_SHIFT)
diff --git a/plat/xilinx/versal_net/platform.mk b/plat/xilinx/versal_net/platform.mk
index 398ef85..9c4cfa0 100644
--- a/plat/xilinx/versal_net/platform.mk
+++ b/plat/xilinx/versal_net/platform.mk
@@ -65,6 +65,10 @@
 
 $(eval $(call add_define_val,VERSAL_NET_CONSOLE,VERSAL_NET_CONSOLE_ID_${VERSAL_NET_CONSOLE}))
 
+ifdef XILINX_OF_BOARD_DTB_ADDR
+$(eval $(call add_define,XILINX_OF_BOARD_DTB_ADDR))
+endif
+
 PLAT_INCLUDES		:=	-Iinclude/plat/arm/common/			\
 				-Iplat/xilinx/common/include/			\
 				-Iplat/xilinx/common/ipi_mailbox_service/	\
@@ -101,7 +105,8 @@
 else
 BL31_SOURCES		+=	${PLAT_PATH}/plat_psci.c
 endif
-BL31_SOURCES		+=	plat/xilinx/common/plat_startup.c		\
+BL31_SOURCES		+=	plat/xilinx/common/plat_fdt.c			\
+				plat/xilinx/common/plat_startup.c		\
 				plat/xilinx/common/ipi.c			\
 				plat/xilinx/common/ipi_mailbox_service/ipi_mailbox_svc.c \
 				plat/xilinx/common/versal.c			\
diff --git a/services/std_svc/spmd/spmd_main.c b/services/std_svc/spmd/spmd_main.c
index 6b16373..587e60f 100644
--- a/services/std_svc/spmd/spmd_main.c
+++ b/services/std_svc/spmd/spmd_main.c
@@ -273,13 +273,16 @@
 
 	assert(plat_ic_get_pending_interrupt_type() == INTR_TYPE_EL3);
 
-	intid = plat_ic_get_pending_interrupt_id();
+	intid = plat_ic_acknowledge_interrupt();
 
 	if (plat_spmd_handle_group0_interrupt(intid) < 0) {
 		ERROR("Group0 interrupt %u not handled\n", intid);
 		panic();
 	}
 
+	/* Deactivate the corresponding Group0 interrupt. */
+	plat_ic_end_of_interrupt(intid);
+
 	return 0U;
 }
 #endif
@@ -300,7 +303,7 @@
 
 	assert(plat_ic_get_pending_interrupt_type() == INTR_TYPE_EL3);
 
-	intid = plat_ic_get_pending_interrupt_id();
+	intid = plat_ic_acknowledge_interrupt();
 
 	/*
 	 * TODO: Currently due to a limitation in SPMD implementation, the
@@ -313,6 +316,9 @@
 		panic();
 	}
 
+	/* Deactivate the corresponding Group0 interrupt. */
+	plat_ic_end_of_interrupt(intid);
+
 	/* Return success. */
 	SMC_RET8(handle, FFA_SUCCESS_SMC32, FFA_PARAM_MBZ, FFA_PARAM_MBZ,
 		 FFA_PARAM_MBZ, FFA_PARAM_MBZ, FFA_PARAM_MBZ, FFA_PARAM_MBZ,
@@ -1187,7 +1193,7 @@
 		if (secure_origin) {
 			return spmd_handle_group0_intr_swd(handle);
 		} else {
-			return spmd_ffa_error_return(handle, FFA_ERROR_DENIED);
+			return spmd_ffa_error_return(handle, FFA_ERROR_NOT_SUPPORTED);
 		}
 	default:
 		WARN("SPM: Unsupported call 0x%08x\n", smc_fid);
