Merge pull request #976 from etienne-lms/minor-psci

psci: minor fixes in lib
diff --git a/Makefile b/Makefile
index aec10c9..31964de 100644
--- a/Makefile
+++ b/Makefile
@@ -454,6 +454,7 @@
 $(eval $(call assert_boolean,USE_COHERENT_MEM))
 $(eval $(call assert_boolean,USE_TBBR_DEFS))
 $(eval $(call assert_boolean,WARMBOOT_ENABLE_DCACHE_EARLY))
+$(eval $(call assert_boolean,ENABLE_SPE_FOR_LOWER_ELS))
 
 $(eval $(call assert_numeric,ARM_ARCH_MAJOR))
 $(eval $(call assert_numeric,ARM_ARCH_MINOR))
@@ -493,6 +494,7 @@
 $(eval $(call add_define,USE_COHERENT_MEM))
 $(eval $(call add_define,USE_TBBR_DEFS))
 $(eval $(call add_define,WARMBOOT_ENABLE_DCACHE_EARLY))
+$(eval $(call add_define,ENABLE_SPE_FOR_LOWER_ELS))
 
 # Define the EL3_PAYLOAD_BASE flag only if it is provided.
 ifdef EL3_PAYLOAD_BASE
diff --git a/bl1/aarch32/bl1_entrypoint.S b/bl1/aarch32/bl1_entrypoint.S
index 39ebcf7..7780626 100644
--- a/bl1/aarch32/bl1_entrypoint.S
+++ b/bl1/aarch32/bl1_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -44,7 +44,7 @@
 * ---------------------------------------------------------------------
 */
 	el3_entrypoint_common					\
-		_set_endian=1					\
+		_init_sctlr=1					\
 		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS	\
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 36ce0d0..f7e02e9 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -25,7 +25,7 @@
 	 * ---------------------------------------------------------------------
 	 */
 	el3_entrypoint_common					\
-		_set_endian=1					\
+		_init_sctlr=1					\
 		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS	\
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 6d10bce..419927d 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -36,12 +36,12 @@
 	 * bl31_entrypoint() during the cold boot flow, so the cold/warm boot
 	 * and primary/secondary CPU logic should not be executed in this case.
 	 *
-	 * Also, assume that the previous bootloader has already set up the CPU
-	 * endianness and has initialised the memory.
+	 * Also, assume that the previous bootloader has already initialised the
+	 * SCTLR_EL3, including the endianness, and has initialised the memory.
 	 * ---------------------------------------------------------------------
 	 */
 	el3_entrypoint_common					\
-		_set_endian=0					\
+		_init_sctlr=0					\
 		_warm_boot_mailbox=0				\
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
@@ -62,7 +62,7 @@
 	 * ---------------------------------------------------------------------
 	 */
 	el3_entrypoint_common					\
-		_set_endian=1					\
+		_init_sctlr=1					\
 		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS	\
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
@@ -136,7 +136,7 @@
 	 * 'el3_entrypoint_common' must be skipped:
 	 *
 	 *  - Only when the platform bypasses the BL1/BL31 entrypoint by
-	 *    programming the reset address do we need to set the CPU endianness.
+	 *    programming the reset address do we need to initialise SCTLR_EL3.
 	 *    In other cases, we assume this has been taken care by the
 	 *    entrypoint code.
 	 *
@@ -149,7 +149,7 @@
 	 *    it has been done once and for all on the cold boot path.
 	 */
 	el3_entrypoint_common					\
-		_set_endian=PROGRAMMABLE_RESET_ADDRESS		\
+		_init_sctlr=PROGRAMMABLE_RESET_ADDRESS		\
 		_warm_boot_mailbox=0				\
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
diff --git a/bl32/sp_min/aarch32/entrypoint.S b/bl32/sp_min/aarch32/entrypoint.S
index e145511..b3fccde 100644
--- a/bl32/sp_min/aarch32/entrypoint.S
+++ b/bl32/sp_min/aarch32/entrypoint.S
@@ -49,12 +49,12 @@
 	 * sp_min_entrypoint() during the cold boot flow, so the cold/warm boot
 	 * and primary/secondary CPU logic should not be executed in this case.
 	 *
-	 * Also, assume that the previous bootloader has already set up the CPU
-	 * endianness and has initialised the memory.
+	 * Also, assume that the previous bootloader has already initialised the
+	 * SCTLR, including the CPU endianness, and has initialised the memory.
 	 * ---------------------------------------------------------------------
 	 */
 	el3_entrypoint_common					\
-		_set_endian=0					\
+		_init_sctlr=0					\
 		_warm_boot_mailbox=0				\
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
@@ -75,7 +75,7 @@
 	 * ---------------------------------------------------------------------
 	 */
 	el3_entrypoint_common					\
-		_set_endian=1					\
+		_init_sctlr=1					\
 		_warm_boot_mailbox=!PROGRAMMABLE_RESET_ADDRESS	\
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
@@ -174,7 +174,7 @@
 	 * 'el3_entrypoint_common' must be skipped:
 	 *
 	 *  - Only when the platform bypasses the BL1/BL32 (SP_MIN) entrypoint by
-	 *    programming the reset address do we need to set the CPU endianness.
+	 *    programming the reset address do we need to initialied the SCTLR.
 	 *    In other cases, we assume this has been taken care by the
 	 *    entrypoint code.
 	 *
@@ -187,7 +187,7 @@
 	 *    it has been done once and for all on the cold boot path.
 	 */
 	el3_entrypoint_common					\
-		_set_endian=PROGRAMMABLE_RESET_ADDRESS		\
+		_init_sctlr=PROGRAMMABLE_RESET_ADDRESS		\
 		_warm_boot_mailbox=0				\
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
diff --git a/bl32/sp_min/sp_min_main.c b/bl32/sp_min/sp_min_main.c
index 45ad03f..d27c023 100644
--- a/bl32/sp_min/sp_min_main.c
+++ b/bl32/sp_min/sp_min_main.c
@@ -8,6 +8,7 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <bl_common.h>
+#include <console.h>
 #include <context.h>
 #include <context_mgmt.h>
 #include <debug.h>
@@ -176,6 +177,14 @@
 	 * corresponding to the desired security state after the next ERET.
 	 */
 	sp_min_prepare_next_image_entry();
+
+	/*
+	 * Perform any platform specific runtime setup prior to cold boot exit
+	 * from SP_MIN.
+	 */
+	sp_min_plat_runtime_setup();
+
+	console_flush();
 }
 
 /******************************************************************************
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index 358292a..746e413 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -220,6 +220,12 @@
     -   `DAIF`. The SError interrupt is enabled by clearing the SError interrupt
         mask bit.
 
+    -   `MDCR_EL3`. The trap controls, `MDCR_EL3.TDOSA`, `MDCR_EL3.TDA` and
+        `MDCR_EL3.TPM`, are set so that accesses to the registers they control
+        do not trap to EL3. AArch64 Secure self-hosted debug is disabled by
+        setting the `MDCR_EL3.SDD` bit. Also `MDCR_EL3.SPD32` is set to
+        disable AArch32 Secure self-hosted privileged debug from S-EL1.
+
 *   Control register setup (for AArch32)
     -   `SCTLR`. Instruction cache is enabled by setting the `SCTLR.I` bit.
         Alignment checking is enabled by setting the `SCTLR.A` bit.
@@ -243,6 +249,9 @@
     -   `CPSR.A`. The Asynchronous data abort interrupt is enabled by clearing
         the Asynchronous data abort interrupt mask bit.
 
+    -   `SDCR`. The `SDCR.SPD` field is set to disable AArch32 Secure
+        self-hosted privileged debug.
+
 #### Platform initialization
 
 On ARM platforms, BL1 performs the following platform initializations:
diff --git a/docs/user-guide.md b/docs/user-guide.md
index 0065ac0..d5423ca 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -542,6 +542,11 @@
     cluster platforms). If this option is enabled, then warm boot path
     enables D-caches immediately after enabling MMU. This option defaults to 0.
 
+*   `ENABLE_SPE_FOR_LOWER_ELS` : Boolean option to enable Statistical Profiling
+     extensions.  This is an optional architectural feature available only for
+     AArch64 8.2 onwards.  This option defaults to 1 but is automatically
+     disabled when the target architecture is AArch32 or AArch64 8.0/8.1.
+
 #### ARM development platform specific build options
 
 *   `ARM_BL31_IN_DRAM`: Boolean option to select loading of BL31 in TZC secured
diff --git a/include/bl32/sp_min/platform_sp_min.h b/include/bl32/sp_min/platform_sp_min.h
index 5b4a5c3..70c5c14 100644
--- a/include/bl32/sp_min/platform_sp_min.h
+++ b/include/bl32/sp_min/platform_sp_min.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -12,8 +12,9 @@
  ******************************************************************************/
 void sp_min_early_platform_setup(void *from_bl2,
 		void *plat_params_from_bl2);
-void sp_min_plat_arch_setup(void);
 void sp_min_platform_setup(void);
+void sp_min_plat_runtime_setup(void);
+void sp_min_plat_arch_setup(void);
 entry_point_info_t *sp_min_plat_get_bl33_ep_info(void);
 
 #endif /* __PLATFORM_SP_MIN_H__ */
diff --git a/include/common/aarch32/el3_common_macros.S b/include/common/aarch32/el3_common_macros.S
index e1261ea..6fc00dd 100644
--- a/include/common/aarch32/el3_common_macros.S
+++ b/include/common/aarch32/el3_common_macros.S
@@ -16,10 +16,18 @@
 	 */
 	.macro el3_arch_init_common _exception_vectors
 	/* ---------------------------------------------------------------------
-	 * Enable the instruction cache and alignment checks
+	 * SCTLR has already been initialised - read current value before
+	 * modifying.
+	 *
+	 * SCTLR.I: Enable the instruction cache.
+	 *
+	 * SCTLR.A: Enable Alignment fault checking. All instructions that load
+	 *  or store one or more registers have an alignment check that the
+	 *  address being accessed is aligned to the size of the data element(s)
+	 *  being accessed.
 	 * ---------------------------------------------------------------------
 	 */
-	ldr	r1, =(SCTLR_RES1 | SCTLR_I_BIT | SCTLR_A_BIT)
+	ldr	r1, =(SCTLR_I_BIT | SCTLR_A_BIT)
 	ldcopr	r0, SCTLR
 	orr	r0, r0, r1
 	stcopr	r0, SCTLR
@@ -34,13 +42,14 @@
 	stcopr	r0, MVBAR
 	isb
 
-	/* -----------------------------------------------------
-	 * Enable the SIF bit to disable instruction fetches
-	 * from Non-secure memory.
-	 * -----------------------------------------------------
+	/* ---------------------------------------------------------------------
+	 * Initialise SCR, setting all fields rather than relying on the hw.
+	 *
+	 * SCR.SIF: Enabled so that Secure state instruction fetches from
+	 *  Non-secure memory are not permitted.
+	 * ---------------------------------------------------------------------
 	 */
-	ldcopr	r0, SCR
-	orr	r0, r0, #SCR_SIF_BIT
+	ldr	r0, =(SCR_RESET_VAL | SCR_SIF_BIT)
 	stcopr	r0, SCR
 
 	/* -----------------------------------------------------
@@ -51,32 +60,61 @@
 	cpsie   a
 	isb
 
-	/* Enable access to Advanced SIMD registers */
+	/* ---------------------------------------------------------------------
+	 * Initialise NSACR, setting all the fields, except for the
+	 * IMPLEMENTATION DEFINED field, rather than relying on the hw. Some
+	 * fields are architecturally UNKNOWN on reset.
+	 *
+	 * NSACR_ENABLE_FP_ACCESS: Represents NSACR.cp11 and NSACR.cp10. The
+	 *  cp11 field is ignored, but is set to same value as cp10. The cp10
+	 *  field is set to allow access to Advanced SIMD and floating point
+	 *  features from both Security states.
+	 * ---------------------------------------------------------------------
+	 */
 	ldcopr	r0, NSACR
-	bic	r0, r0, #NSASEDIS_BIT
-	bic	r0, r0, #NSTRCDIS_BIT
-	orr	r0, r0, #(NASCR_CP10_BIT | NASCR_CP11_BIT)
+	and	r0, r0, #NSACR_IMP_DEF_MASK
+	orr	r0, r0, #(NSACR_RESET_VAL | NSACR_ENABLE_FP_ACCESS)
 	stcopr	r0, NSACR
 	isb
 
-	/*
-	 * Enable access to Advanced SIMD, Floating point and to the Trace
-	 * functionality as well.
+	/* ---------------------------------------------------------------------
+	 * Initialise CPACR, setting all fields rather than relying on hw. Some
+	 * fields are architecturally UNKNOWN on reset.
+	 *
+	 * CPACR.TRCDIS: Trap control for PL0 and PL1 System register accesses
+	 *  to trace registers. Set to zero to allow access.
+	 *
+	 * CPACR_ENABLE_FP_ACCESS: Represents CPACR.cp11 and CPACR.cp10. The
+	 *  cp11 field is ignored, but is set to same value as cp10. The cp10
+	 *  field is set to allow full access from PL0 and PL1 to floating-point
+	 *  and Advanced SIMD features.
+	 * ---------------------------------------------------------------------
 	 */
-	ldcopr	r0, CPACR
-	bic	r0, r0, #ASEDIS_BIT
-	bic	r0, r0, #TRCDIS_BIT
-	orr	r0, r0, #CPACR_ENABLE_FP_ACCESS
+	ldr	r0, =((CPACR_RESET_VAL | CPACR_ENABLE_FP_ACCESS) & ~(TRCDIS_BIT))
 	stcopr	r0, CPACR
 	isb
 
-	vmrs	r0, FPEXC
-	orr	r0, r0, #FPEXC_EN_BIT
+	/* ---------------------------------------------------------------------
+	 * Initialise FPEXC, setting all fields rather than relying on hw. Some
+	 * fields are architecturally UNKNOWN on reset and are set to zero
+	 * except for field(s) listed below.
+	 *
+	 * FPEXC.EN: Enable access to Advanced SIMD and floating point features
+	 *  from all exception levels.
+	 * ---------------------------------------------------------------------
+	 */
+	ldr	r0, =(FPEXC_RESET_VAL | FPEXC_EN_BIT)
 	vmsr	FPEXC, r0
 	isb
 
-	/* Disable secure self-hosted invasive debug. */
-	ldr	r0, =SDCR_DEF_VAL
+	/* ---------------------------------------------------------------------
+	 * Initialise SDCR, setting all the fields rather than relying on hw.
+	 *
+	 * SDCR.SPD: Disable AArch32 privileged debug. Debug exceptions from
+	 * Secure EL1 are disabled.
+	 * ---------------------------------------------------------------------
+	 */
+	ldr	r0, =(SDCR_RESET_VAL | SDCR_SPD(SDCR_SPD_DISABLE))
 	stcopr	r0, SDCR
 
 	.endm
@@ -91,8 +129,9 @@
  * why this macro is parameterised ; each parameter allows to enable/disable
  * some actions.
  *
- *  _set_endian:
- *	Whether the macro needs to configure the endianness of data accesses.
+ *  _init_sctlr:
+ *	Whether the macro needs to initialise the SCTLR register including
+ *	configuring the endianness of data accesses.
  *
  *  _warm_boot_mailbox:
  *	Whether the macro needs to detect the type of boot (cold/warm). The
@@ -120,7 +159,7 @@
  * -----------------------------------------------------------------------------
  */
 	.macro el3_entrypoint_common					\
-		_set_endian, _warm_boot_mailbox, _secondary_cold_boot,	\
+		_init_sctlr, _warm_boot_mailbox, _secondary_cold_boot,	\
 		_init_memory, _init_c_runtime, _exception_vectors
 
 	/* Make sure we are in Secure Mode */
@@ -130,17 +169,27 @@
 	ASM_ASSERT(eq)
 #endif
 
-	.if \_set_endian
+	.if \_init_sctlr
 		/* -------------------------------------------------------------
-		 * Set the CPU endianness before doing anything that might
-		 * involve memory reads or writes.
+		 * This is the initialisation of SCTLR and so must ensure that
+		 * all fields are explicitly set rather than relying on hw. Some
+		 * fields reset to an IMPLEMENTATION DEFINED value.
+		 *
+		 * SCTLR.TE: Set to zero so that exceptions to an Exception
+		 *  Level executing at PL1 are taken to A32 state.
+		 *
+		 * SCTLR.EE: Set the CPU endianness before doing anything that
+		 *  might involve memory reads or writes. Set to zero to select
+		 *  Little Endian.
+		 *
+		 * SCTLR.V: Set to zero to select the normal exception vectors
+		 *  with base address held in VBAR.
 		 * -------------------------------------------------------------
 		 */
-		ldcopr	r0, SCTLR
-		bic	r0, r0, #SCTLR_EE_BIT
+		ldr     r0, =(SCTLR_RESET_VAL & ~(SCTLR_TE_BIT | SCTLR_EE_BIT | SCTLR_V_BIT))
 		stcopr	r0, SCTLR
 		isb
-	.endif /* _set_endian */
+	.endif /* _init_sctlr */
 
 	/* Switch to monitor mode */
 	cps	#MODE32_mon
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index 674d52f..34fdaee 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -15,8 +15,20 @@
 	 */
 	.macro el3_arch_init_common _exception_vectors
 	/* ---------------------------------------------------------------------
-	 * Enable the instruction cache, stack pointer and data access alignment
-	 * checks
+	 * SCTLR_EL3 has already been initialised - read current value before
+	 * modifying.
+	 *
+	 * SCTLR_EL3.I: Enable the instruction cache.
+	 *
+	 * SCTLR_EL3.SA: Enable Stack Aligment check. A SP alignment fault
+	 *  exception is generated if a load or store instruction executed at
+	 *  EL3 uses the SP as the base address and the SP is not aligned to a
+	 *  16-byte boundary.
+	 *
+	 * SCTLR_EL3.A: Enable Alignment fault checking. All instructions that
+	 *  load or store one or more registers have an alignment check that the
+	 *  address being accessed is aligned to the size of the data element(s)
+	 *  being accessed.
 	 * ---------------------------------------------------------------------
 	 */
 	mov	x1, #(SCTLR_I_BIT | SCTLR_A_BIT | SCTLR_SA_BIT)
@@ -46,19 +58,73 @@
 	isb
 
 	/* ---------------------------------------------------------------------
-	 * Early set RES1 bits in SCR_EL3. Set EA bit to catch both
-	 * External Aborts and SError Interrupts in EL3 and also the SIF bit
-	 * to disable instruction fetches from Non-secure memory.
+	 * Initialise SCR_EL3, setting all fields rather than relying on hw.
+	 * All fields are architecturally UNKNOWN on reset. The following fields
+	 * do not change during the TF lifetime. The remaining fields are set to
+	 * zero here but are updated ahead of transitioning to a lower EL in the
+	 * function cm_init_context_common().
+	 *
+	 * SCR_EL3.TWE: Set to zero so that execution of WFE instructions at
+	 *  EL2, EL1 and EL0 are not trapped to EL3.
+	 *
+	 * SCR_EL3.TWI: Set to zero so that execution of WFI instructions at
+	 *  EL2, EL1 and EL0 are not trapped to EL3.
+	 *
+	 * SCR_EL3.SIF: Set to one to disable instruction fetches from
+	 *  Non-secure memory.
+	 *
+	 * SCR_EL3.SMD: Set to zero to enable SMC calls at EL1 and above, from
+	 *  both Security states and both Execution states.
+	 *
+	 * SCR_EL3.EA: Set to one to route External Aborts and SError Interrupts
+	 *  to EL3 when executing at any EL.
 	 * ---------------------------------------------------------------------
 	 */
-	mov	x0, #(SCR_RES1_BITS | SCR_EA_BIT | SCR_SIF_BIT)
+	mov	x0, #((SCR_RESET_VAL | SCR_EA_BIT | SCR_SIF_BIT) \
+			& ~(SCR_TWE_BIT | SCR_TWI_BIT | SCR_SMD_BIT))
 	msr	scr_el3, x0
 
 	/* ---------------------------------------------------------------------
-	 * Disable secure self-hosted invasive debug.
+	 * Initialise MDCR_EL3, setting all fields rather than relying on hw.
+	 * Some fields are architecturally UNKNOWN on reset.
+	 *
+	 * MDCR_EL3.SDD: Set to one to disable AArch64 Secure self-hosted debug.
+	 *  Debug exceptions, other than Breakpoint Instruction exceptions, are
+	 *  disabled from all ELs in Secure state.
+	 *
+	 * MDCR_EL3.SPD32: Set to 0b10 to disable AArch32 Secure self-hosted
+	 *  privileged debug from S-EL1.
+	 *
+	 * MDCR_EL3.NSPB (ARM v8.2): SPE enabled in non-secure state and
+	 * disabled in secure state. Accesses to SPE registers at SEL1 generate
+	 * trap exceptions to EL3.
+	 *
+	 * MDCR_EL3.TDOSA: Set to zero so that EL2 and EL2 System register
+	 *  access to the powerdown debug registers do not trap to EL3.
+	 *
+	 * MDCR_EL3.TDA: Set to zero to allow EL0, EL1 and EL2 access to the
+	 *  debug registers, other than those registers that are controlled by
+	 *  MDCR_EL3.TDOSA.
+	 *
+	 * MDCR_EL3.TPM: Set to zero so that EL0, EL1, and EL2 System register
+	 *  accesses to all Performance Monitors registers do not trap to EL3.
 	 * ---------------------------------------------------------------------
 	 */
-	mov_imm	x0, MDCR_DEF_VAL
+	mov_imm	x0, ((MDCR_EL3_RESET_VAL | MDCR_SDD_BIT | MDCR_SPD32(MDCR_SPD32_DISABLE)) \
+			& ~(MDCR_TDOSA_BIT | MDCR_TDA_BIT | MDCR_TPM_BIT))
+
+#if ENABLE_SPE_FOR_LOWER_ELS
+	/* Detect if SPE is implemented */
+	mrs	x1, id_aa64dfr0_el1
+	ubfx	x1, x1, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
+	cmp	x1, #0x1
+	b.ne	1f
+
+	/* Enable SPE for use by normal world */
+	orr	x0, x0, #MDCR_NSPB(MDCR_NSPB_EL1)
+1:
+#endif
+
 	msr	mdcr_el3, x0
 
 	/* ---------------------------------------------------------------------
@@ -69,28 +135,20 @@
 	msr	daifclr, #DAIF_ABT_BIT
 
 	/* ---------------------------------------------------------------------
-	 * The initial state of the Architectural feature trap register
-	 * (CPTR_EL3) is unknown and it must be set to a known state. All
-	 * feature traps are disabled. Some bits in this register are marked as
-	 * reserved and should not be modified.
+	 * Initialise CPTR_EL3, setting all fields rather than relying on hw.
+	 * All fields are architecturally UNKNOWN on reset.
 	 *
-	 * CPTR_EL3.TCPAC: This causes a direct access to the CPACR_EL1 from EL1
-	 *  or the CPTR_EL2 from EL2 to trap to EL3 unless it is trapped at EL2.
+	 * CPTR_EL3.TCPAC: Set to zero so that any accesses to CPACR_EL1,
+	 *  CPTR_EL2, CPACR, or HCPTR do not trap to EL3.
 	 *
-	 * CPTR_EL3.TTA: This causes access to the Trace functionality to trap
-	 *  to EL3 when executed from EL0, EL1, EL2, or EL3. If system register
-	 *  access to trace functionality is not supported, this bit is RES0.
+	 * CPTR_EL3.TTA: Set to zero so that System register accesses to the
+	 *  trace registers do not trap to EL3.
 	 *
-	 * CPTR_EL3.TFP: This causes instructions that access the registers
-	 *  associated with Floating Point and Advanced SIMD execution to trap
-	 *  to EL3 when executed from any exception level, unless trapped to EL1
-	 *  or EL2.
+	 * CPTR_EL3.TFP: Set to zero so that accesses to Advanced SIMD and
+	 *  floating-point functionality do not trap to EL3.
 	 * ---------------------------------------------------------------------
 	 */
-	mrs	x0, cptr_el3
-	bic	w0, w0, #TCPAC_BIT
-	bic	w0, w0, #TTA_BIT
-	bic	w0, w0, #TFP_BIT
+	mov_imm x0, (CPTR_EL3_RESET_VAL & ~(TCPAC_BIT | TTA_BIT | TFP_BIT))
 	msr	cptr_el3, x0
 	.endm
 
@@ -104,8 +162,9 @@
  * why this macro is parameterised ; each parameter allows to enable/disable
  * some actions.
  *
- *  _set_endian:
- *	Whether the macro needs to configure the endianness of data accesses.
+ *  _init_sctlr:
+ *	Whether the macro needs to initialise SCTLR_EL3, including configuring
+ *      the endianness of data accesses.
  *
  *  _warm_boot_mailbox:
  *	Whether the macro needs to detect the type of boot (cold/warm). The
@@ -133,20 +192,35 @@
  * -----------------------------------------------------------------------------
  */
 	.macro el3_entrypoint_common					\
-		_set_endian, _warm_boot_mailbox, _secondary_cold_boot,	\
+		_init_sctlr, _warm_boot_mailbox, _secondary_cold_boot,	\
 		_init_memory, _init_c_runtime, _exception_vectors
 
-	.if \_set_endian
+	.if \_init_sctlr
 		/* -------------------------------------------------------------
-		 * Set the CPU endianness before doing anything that might
-		 * involve memory reads or writes.
+		 * This is the initialisation of SCTLR_EL3 and so must ensure
+		 * that all fields are explicitly set rather than relying on hw.
+		 * Some fields reset to an IMPLEMENTATION DEFINED value and
+		 * others are architecturally UNKNOWN on reset.
+		 *
+		 * SCTLR.EE: Set the CPU endianness before doing anything that
+		 *  might involve memory reads or writes. Set to zero to select
+		 *  Little Endian.
+		 *
+		 * SCTLR_EL3.WXN: For the EL3 translation regime, this field can
+		 *  force all memory regions that are writeable to be treated as
+		 *  XN (Execute-never). Set to zero so that this control has no
+		 *  effect on memory access permissions.
+		 *
+		 * SCTLR_EL3.SA: Set to zero to disable Stack Aligment check.
+		 *
+		 * SCTLR_EL3.A: Set to zero to disable Alignment fault checking.
 		 * -------------------------------------------------------------
 		 */
-		mrs	x0, sctlr_el3
-		bic	x0, x0, #SCTLR_EE_BIT
+		mov_imm	x0, (SCTLR_RESET_VAL & ~(SCTLR_EE_BIT | SCTLR_WXN_BIT \
+				| SCTLR_SA_BIT | SCTLR_A_BIT))
 		msr	sctlr_el3, x0
 		isb
-	.endif /* _set_endian */
+	.endif /* _init_sctlr */
 
 	.if \_warm_boot_mailbox
 		/* -------------------------------------------------------------
diff --git a/include/common/ep_info.h b/include/common/ep_info.h
index 23d27c4..3f6213f 100644
--- a/include/common/ep_info.h
+++ b/include/common/ep_info.h
@@ -33,6 +33,7 @@
 			((x) = ((x) & ~PARAM_EP_SECURITY_MASK) | (security))
 
 #define EP_EE_MASK	U(0x2)
+#define EP_EE_SHIFT	1
 #define EP_EE_LITTLE	U(0x0)
 #define EP_EE_BIG	U(0x2)
 #define EP_GET_EE(x) (x & EP_EE_MASK)
diff --git a/include/lib/aarch32/arch.h b/include/lib/aarch32/arch.h
index d70e4c7..661dbf8 100644
--- a/include/lib/aarch32/arch.h
+++ b/include/lib/aarch32/arch.h
@@ -101,14 +101,19 @@
 #define SCTLR_TRE_BIT		(1 << 28)
 #define SCTLR_AFE_BIT		(1 << 29)
 #define SCTLR_TE_BIT		(1 << 30)
+#define SCTLR_RESET_VAL         (SCTLR_RES1 | SCTLR_NTWE_BIT |		\
+				SCTLR_NTWI_BIT | SCTLR_CP15BEN_BIT)
 
 /* SDCR definitions */
 #define SDCR_SPD(x)		((x) << 14)
 #define SDCR_SPD_LEGACY		0x0
 #define SDCR_SPD_DISABLE	0x2
 #define SDCR_SPD_ENABLE		0x3
+#define SDCR_RESET_VAL		0x0
 
+#if !ERROR_DEPRECATED
 #define SDCR_DEF_VAL		SDCR_SPD(SDCR_SPD_DISABLE)
+#endif
 
 /* HSCTLR definitions */
 #define HSCTLR_RES1 	((1 << 29) | (1 << 28) | (1 << 23) | (1 << 22)	\
@@ -145,6 +150,7 @@
 #define SCR_IRQ_BIT		(1 << 1)
 #define SCR_NS_BIT		(1 << 0)
 #define SCR_VALID_BIT_MASK	0x33ff
+#define SCR_RESET_VAL		0x0
 
 #define GET_NS_BIT(scr)		((scr) & SCR_NS_BIT)
 
@@ -152,9 +158,10 @@
 #define HCR_AMO_BIT		(1 << 5)
 #define HCR_IMO_BIT		(1 << 4)
 #define HCR_FMO_BIT		(1 << 3)
+#define HCR_RESET_VAL		0x0
 
 /* CNTHCTL definitions */
-#define EVNTEN_BIT		(1 << 2)
+#define CNTHCTL_RESET_VAL	0x0
 #define PL1PCEN_BIT		(1 << 1)
 #define PL1PCTEN_BIT		(1 << 0)
 
@@ -169,16 +176,42 @@
 #define EVNTI_MASK		0xf
 
 /* HCPTR definitions */
+#define HCPTR_RES1		((1 << 13) | (1<<12) | 0x3ff)
 #define TCPAC_BIT		(1 << 31)
 #define TTA_BIT			(1 << 20)
 #define TCP11_BIT		(1 << 10)
 #define TCP10_BIT		(1 << 10)
+#define HCPTR_RESET_VAL		HCPTR_RES1
+
+/* VTTBR defintions */
+#define VTTBR_RESET_VAL		ULL(0x0)
+#define VTTBR_VMID_MASK		ULL(0xff)
+#define VTTBR_VMID_SHIFT	48
+#define VTTBR_BADDR_MASK	0xffffffffffff
+#define VTTBR_BADDR_SHIFT	0
+
+/* HDCR definitions */
+#define HDCR_RESET_VAL		0x0
+
+/* HSTR definitions */
+#define HSTR_RESET_VAL		0x0
+
+/* CNTHP_CTL definitions */
+#define CNTHP_CTL_RESET_VAL	0x0
 
 /* NASCR definitions */
 #define NSASEDIS_BIT		(1 << 15)
 #define NSTRCDIS_BIT		(1 << 20)
+/* NOTE: correct typo in the definitions */
+#if !ERROR_DEPRECATED
 #define NASCR_CP11_BIT		(1 << 11)
 #define NASCR_CP10_BIT		(1 << 10)
+#endif
+#define NSACR_CP11_BIT		(1 << 11)
+#define NSACR_CP10_BIT		(1 << 10)
+#define NSACR_IMP_DEF_MASK	(0x7 << 16)
+#define NSACR_ENABLE_FP_ACCESS	(NSACR_CP11_BIT | NSACR_CP10_BIT)
+#define NSACR_RESET_VAL		0x0
 
 /* CPACR definitions */
 #define ASEDIS_BIT		(1 << 31)
@@ -187,9 +220,12 @@
 #define CPACR_CP10_SHIFT	20
 #define CPACR_ENABLE_FP_ACCESS	(0x3 << CPACR_CP11_SHIFT |\
 					0x3 << CPACR_CP10_SHIFT)
+#define CPACR_RESET_VAL         0x0
 
 /* FPEXC definitions */
+#define FPEXC_RES1		((1 << 10) | (1 << 9) | (1 << 8))
 #define FPEXC_EN_BIT		(1 << 30)
+#define FPEXC_RESET_VAL		FPEXC_RES1
 
 /* SPSR/CPSR definitions */
 #define SPSR_FIQ_BIT		(1 << 0)
@@ -369,6 +405,7 @@
 #define HSCTLR		p15, 4, c1, c0, 0
 #define HCR		p15, 4, c1, c1, 0
 #define HCPTR		p15, 4, c1, c1, 2
+#define HSTR		p15, 4, c1, c1, 3
 #define CNTHCTL		p15, 4, c14, c1, 0
 #define CNTKCTL		p15, 0, c14, c1, 0
 #define VPIDR		p15, 4, c0, c0, 0
diff --git a/include/lib/aarch32/arch_helpers.h b/include/lib/aarch32/arch_helpers.h
index e652a59..5d31836 100644
--- a/include/lib/aarch32/arch_helpers.h
+++ b/include/lib/aarch32/arch_helpers.h
@@ -100,15 +100,30 @@
  * Macros to create inline functions for tlbi operations
  *********************************************************************/
 
+#if ERRATA_A57_813419
+/*
+ * Define function for TLBI instruction with type specifier that
+ * implements the workaround for errata 813419 of Cortex-A57
+ */
 #define _DEFINE_TLBIOP_FUNC(_op, coproc, opc1, CRn, CRm, opc2)		\
 static inline void tlbi##_op(void)					\
 {									\
 	u_register_t v = 0;						\
 	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
+	__asm__ volatile ("dsb ish");\
+	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
 }
 
-#define _DEFINE_BPIOP_FUNC(_op, coproc, opc1, CRn, CRm, opc2)		\
-static inline void bpi##_op(void)					\
+#define _DEFINE_TLBIOP_PARAM_FUNC(_op, coproc, opc1, CRn, CRm, opc2)	\
+static inline void tlbi##_op(u_register_t v)				\
+{									\
+	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
+	__asm__ volatile ("dsb ish");\
+	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
+}
+#else
+#define _DEFINE_TLBIOP_FUNC(_op, coproc, opc1, CRn, CRm, opc2)		\
+static inline void tlbi##_op(void)					\
 {									\
 	u_register_t v = 0;						\
 	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
@@ -119,6 +134,14 @@
 {									\
 	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
 }
+#endif /* ERRATA_A57_813419 */
+
+#define _DEFINE_BPIOP_FUNC(_op, coproc, opc1, CRn, CRm, opc2)		\
+static inline void bpi##_op(void)					\
+{									\
+	u_register_t v = 0;						\
+	__asm__ volatile ("mcr "#coproc","#opc1",%0,"#CRn","#CRm","#opc2 : : "r" (v));\
+}
 
 /* Define function for simple TLBI operation */
 #define DEFINE_TLBIOP_FUNC(_op, ...)					\
@@ -228,6 +251,7 @@
 DEFINE_COPROCR_RW_FUNCS_64(ttbr1, TTBR1_64)
 DEFINE_COPROCR_RW_FUNCS_64(cntvoff, CNTVOFF_64)
 DEFINE_COPROCR_RW_FUNCS(csselr, CSSELR)
+DEFINE_COPROCR_RW_FUNCS(hstr, HSTR)
 
 DEFINE_COPROCR_RW_FUNCS(icc_sre_el1, ICC_SRE)
 DEFINE_COPROCR_RW_FUNCS(icc_sre_el2, ICC_HSRE)
diff --git a/include/lib/aarch64/arch.h b/include/lib/aarch64/arch.h
index e84c888..7bceea7 100644
--- a/include/lib/aarch64/arch.h
+++ b/include/lib/aarch64/arch.h
@@ -110,6 +110,11 @@
 #define ID_AA64PFR0_EL3_SHIFT	U(12)
 #define ID_AA64PFR0_ELX_MASK	U(0xf)
 
+/* ID_AA64DFR0_EL1.PMS definitions (for ARMv8.2+) */
+#define ID_AA64DFR0_PMS_SHIFT	U(32)
+#define ID_AA64DFR0_PMS_LENGTH	U(4)
+#define ID_AA64DFR0_PMS_MASK	U(0xf)
+
 #define EL_IMPL_NONE		U(0)
 #define EL_IMPL_A64ONLY		U(1)
 #define EL_IMPL_A64_A32		U(2)
@@ -135,16 +140,20 @@
 				 & ID_PFR1_VIRTEXT_MASK)
 
 /* SCTLR definitions */
-#define SCTLR_EL2_RES1  ((U(1) << 29) | (U(1) << 28) | (U(1) << 23) | \
+#define SCTLR_EL2_RES1	((U(1) << 29) | (U(1) << 28) | (U(1) << 23) | \
 			 (U(1) << 22) | (U(1) << 18) | (U(1) << 16) | \
 			 (U(1) << 11) | (U(1) << 5) | (U(1) << 4))
 
-#define SCTLR_EL1_RES1  ((U(1) << 29) | (U(1) << 28) | (U(1) << 23) | \
+#define SCTLR_EL1_RES1	((U(1) << 29) | (U(1) << 28) | (U(1) << 23) | \
 			 (U(1) << 22) | (U(1) << 20) | (U(1) << 11))
 #define SCTLR_AARCH32_EL1_RES1 \
 			((U(1) << 23) | (U(1) << 22) | (U(1) << 11) | \
 			 (U(1) << 4) | (U(1) << 3))
 
+#define SCTLR_EL3_RES1	((U(1) << 29) | (U(1) << 28) | (U(1) << 23) | \
+			(U(1) << 22) | (U(1) << 18) | (U(1) << 16) | \
+			(U(1) << 11) | (U(1) << 5) | (U(1) << 4))
+
 #define SCTLR_M_BIT		(U(1) << 0)
 #define SCTLR_A_BIT		(U(1) << 1)
 #define SCTLR_C_BIT		(U(1) << 2)
@@ -155,6 +164,7 @@
 #define SCTLR_NTWE_BIT		(U(1) << 18)
 #define SCTLR_WXN_BIT		(U(1) << 19)
 #define SCTLR_EE_BIT		(U(1) << 25)
+#define SCTLR_RESET_VAL		SCTLR_EL3_RES1
 
 /* CPACR_El1 definitions */
 #define CPACR_EL1_FPEN(x)	((x) << 20)
@@ -176,15 +186,52 @@
 #define SCR_IRQ_BIT		(U(1) << 1)
 #define SCR_NS_BIT		(U(1) << 0)
 #define SCR_VALID_BIT_MASK	U(0x2f8f)
+#define SCR_RESET_VAL		SCR_RES1_BITS
 
-/* MDCR definitions */
+/* MDCR_EL3 definitions */
 #define MDCR_SPD32(x)		((x) << 14)
 #define MDCR_SPD32_LEGACY	U(0x0)
 #define MDCR_SPD32_DISABLE	U(0x2)
 #define MDCR_SPD32_ENABLE	U(0x3)
 #define MDCR_SDD_BIT		(U(1) << 16)
+#define MDCR_NSPB(x)		((x) << 12)
+#define MDCR_NSPB_EL1		U(0x3)
+#define MDCR_TDOSA_BIT		(U(1) << 10)
+#define MDCR_TDA_BIT		(U(1) << 9)
+#define MDCR_TPM_BIT		(U(1) << 6)
+#define MDCR_EL3_RESET_VAL	U(0x0)
 
+#if !ERROR_DEPRECATED
 #define MDCR_DEF_VAL		(MDCR_SDD_BIT | MDCR_SPD32(MDCR_SPD32_DISABLE))
+#endif
+
+/* MDCR_EL2 definitions */
+#define MDCR_EL2_TPMS		(U(1) << 14)
+#define MDCR_EL2_E2PB(x)	((x) << 12)
+#define MDCR_EL2_E2PB_EL1	U(0x3)
+#define MDCR_EL2_TDRA_BIT	(U(1) << 11)
+#define MDCR_EL2_TDOSA_BIT	(U(1) << 10)
+#define MDCR_EL2_TDA_BIT	(U(1) << 9)
+#define MDCR_EL2_TDE_BIT	(U(1) << 8)
+#define MDCR_EL2_HPME_BIT	(U(1) << 7)
+#define MDCR_EL2_TPM_BIT	(U(1) << 6)
+#define MDCR_EL2_TPMCR_BIT	(U(1) << 5)
+#define MDCR_EL2_RESET_VAL	U(0x0)
+
+/* HSTR_EL2 definitions */
+#define HSTR_EL2_RESET_VAL	U(0x0)
+#define HSTR_EL2_T_MASK		U(0xff)
+
+/* CNTHP_CTL_EL2 definitions */
+#define CNTHP_CTL_ENABLE_BIT	(U(1) << 0)
+#define CNTHP_CTL_RESET_VAL	U(0x0)
+
+/* VTTBR_EL2 definitions */
+#define VTTBR_RESET_VAL		ULL(0x0)
+#define VTTBR_VMID_MASK		ULL(0xff)
+#define VTTBR_VMID_SHIFT	U(48)
+#define VTTBR_BADDR_MASK	ULL(0xffffffffffff)
+#define VTTBR_BADDR_SHIFT	U(0)
 
 /* HCR definitions */
 #define HCR_RW_SHIFT		U(31)
@@ -199,6 +246,7 @@
 #define ISR_F_SHIFT		U(6)
 
 /* CNTHCTL_EL2 definitions */
+#define CNTHCTL_RESET_VAL	U(0x0)
 #define EVNTEN_BIT		(U(1) << 2)
 #define EL1PCEN_BIT		(U(1) << 1)
 #define EL1PCTEN_BIT		(U(1) << 0)
@@ -217,6 +265,14 @@
 #define TCPAC_BIT		(U(1) << 31)
 #define TTA_BIT			(U(1) << 20)
 #define TFP_BIT			(U(1) << 10)
+#define CPTR_EL3_RESET_VAL	U(0x0)
+
+/* CPTR_EL2 definitions */
+#define CPTR_EL2_RES1		((U(1) << 13) | (U(1) << 12) | (U(0x3ff)))
+#define CPTR_EL2_TCPAC_BIT	(U(1) << 31)
+#define CPTR_EL2_TTA_BIT	(U(1) << 20)
+#define CPTR_EL2_TFP_BIT	(U(1) << 10)
+#define CPTR_EL2_RESET_VAL	CPTR_EL2_RES1
 
 /* CPSR/SPSR definitions */
 #define DAIF_FIQ_BIT		(U(1) << 0)
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 32290e2..0d0d7d3 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -184,6 +184,7 @@
 DEFINE_SYSREG_READ_FUNC(par_el1)
 DEFINE_SYSREG_READ_FUNC(id_pfr1_el1)
 DEFINE_SYSREG_READ_FUNC(id_aa64pfr0_el1)
+DEFINE_SYSREG_READ_FUNC(id_aa64dfr0_el1)
 DEFINE_SYSREG_READ_FUNC(CurrentEl)
 DEFINE_SYSREG_RW_FUNCS(daif)
 DEFINE_SYSREG_RW_FUNCS(spsr_el1)
diff --git a/include/lib/cpus/aarch32/cortex_a53.h b/include/lib/cpus/aarch32/cortex_a53.h
index 265cb15..24a9c6c 100644
--- a/include/lib/cpus/aarch32/cortex_a53.h
+++ b/include/lib/cpus/aarch32/cortex_a53.h
@@ -42,6 +42,8 @@
  ******************************************************************************/
 #define CORTEX_A53_ACTLR			p15, 0, c15
 
+#define CORTEX_A53_ACTLR_ENDCCASCI_SHIFT	44
+#define CORTEX_A53_ACTLR_ENDCCASCI		(1 << CORTEX_A53_ACTLR_ENDCCASCI_SHIFT)
 #define CORTEX_A53_ACTLR_DTAH			(1 << 24)
 
 /*******************************************************************************
diff --git a/include/lib/cpus/aarch32/cortex_a57.h b/include/lib/cpus/aarch32/cortex_a57.h
index 1c3fa25..1486b98 100644
--- a/include/lib/cpus/aarch32/cortex_a57.h
+++ b/include/lib/cpus/aarch32/cortex_a57.h
@@ -55,7 +55,7 @@
 /*******************************************************************************
  * L2 Control register specific definitions.
  ******************************************************************************/
-#define CORTEX_A57_L2CTLR			p15, 1, c9, c0, 3
+#define CORTEX_A57_L2CTLR			p15, 1, c9, c0, 2
 
 #define CORTEX_A57_L2CTLR_DATA_RAM_LATENCY_SHIFT 0
 #define CORTEX_A57_L2CTLR_TAG_RAM_LATENCY_SHIFT	6
diff --git a/include/lib/cpus/aarch32/cortex_a72.h b/include/lib/cpus/aarch32/cortex_a72.h
index a550192..59057bc 100644
--- a/include/lib/cpus/aarch32/cortex_a72.h
+++ b/include/lib/cpus/aarch32/cortex_a72.h
@@ -37,7 +37,7 @@
 /*******************************************************************************
  * L2 Control register specific definitions.
  ******************************************************************************/
-#define CORTEX_A72_L2CTLR			p15, 1, c9, c0, 3
+#define CORTEX_A72_L2CTLR			p15, 1, c9, c0, 2
 
 #define CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT 0
 #define CORTEX_A72_L2CTLR_TAG_RAM_LATENCY_SHIFT	6
diff --git a/include/lib/el3_runtime/aarch64/context.h b/include/lib/el3_runtime/aarch64/context.h
index dead971..dcbf1c9 100644
--- a/include/lib/el3_runtime/aarch64/context.h
+++ b/include/lib/el3_runtime/aarch64/context.h
@@ -308,6 +308,7 @@
  * Function prototypes
  ******************************************************************************/
 void el1_sysregs_context_save(el1_sys_regs_t *regs);
+void el1_sysregs_context_save_post_ops(void);
 void el1_sysregs_context_restore(el1_sys_regs_t *regs);
 #if CTX_INCLUDE_FPREGS
 void fpregs_context_save(fp_regs_t *regs);
diff --git a/include/plat/arm/board/common/board_css_def.h b/include/plat/arm/board/common/board_css_def.h
index 11c4b17..b0a6baf 100644
--- a/include/plat/arm/board/common/board_css_def.h
+++ b/include/plat/arm/board/common/board_css_def.h
@@ -54,6 +54,9 @@
 #define PLAT_ARM_BL31_RUN_UART_BASE		SOC_CSS_UART1_BASE
 #define PLAT_ARM_BL31_RUN_UART_CLK_IN_HZ	SOC_CSS_UART1_CLK_IN_HZ
 
+#define PLAT_ARM_SP_MIN_RUN_UART_BASE		SOC_CSS_UART1_BASE
+#define PLAT_ARM_SP_MIN_RUN_UART_CLK_IN_HZ	SOC_CSS_UART1_CLK_IN_HZ
+
 #define PLAT_ARM_CRASH_UART_BASE		PLAT_ARM_BL31_RUN_UART_BASE
 #define PLAT_ARM_CRASH_UART_CLK_IN_HZ		PLAT_ARM_BL31_RUN_UART_CLK_IN_HZ
 
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index 62c0ce7..3a73776 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -163,6 +163,7 @@
 /* SP_MIN utility functions */
 void arm_sp_min_early_platform_setup(void *from_bl2,
 		void *plat_params_from_bl2);
+void arm_sp_min_plat_runtime_setup(void);
 
 /* FIP TOC validity check */
 int arm_io_is_toc_valid(void);
@@ -218,4 +219,7 @@
 		uint32_t cookie_lo,
 		void *handle);
 
+/* Disable Statistical Profiling Extensions helper */
+void arm_disable_spe(void);
+
 #endif /* __PLAT_ARM_H__ */
diff --git a/lib/cpus/aarch32/cortex_a53.S b/lib/cpus/aarch32/cortex_a53.S
index 3d5f833..bc2c762 100644
--- a/lib/cpus/aarch32/cortex_a53.S
+++ b/lib/cpus/aarch32/cortex_a53.S
@@ -10,6 +10,11 @@
 #include <cpu_macros.S>
 #include <debug.h>
 
+#if A53_DISABLE_NON_TEMPORAL_HINT
+#undef ERRATA_A53_836870
+#define ERRATA_A53_836870	1
+#endif
+
 	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
@@ -23,11 +28,133 @@
 	bx	lr
 endfunc cortex_a53_disable_smp
 
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #826319.
+	 * This applies only to revision <= r0p2 of Cortex A53.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a53_826319_wa
+	/*
+	 * Compare r0 against revision r0p2
+	 */
+	mov	r2, lr
+	bl	check_errata_826319
+	mov	lr, r2
+	cmp	r0, #ERRATA_NOT_APPLIES
+	beq	1f
+	ldcopr	r0, CORTEX_A53_L2ACTLR
+	bic	r0, #CORTEX_A53_L2ACTLR_ENABLE_UNIQUECLEAN
+	orr	r0, #CORTEX_A53_L2ACTLR_DISABLE_CLEAN_PUSH
+	stcopr	r0, CORTEX_A53_L2ACTLR
+1:
+	bx	lr
+endfunc errata_a53_826319_wa
+
+func check_errata_826319
+	mov	r1, #0x02
+	b	cpu_rev_var_ls
+endfunc check_errata_826319
+
+	/* ---------------------------------------------------------------------
+	 * Disable the cache non-temporal hint.
+	 *
+	 * This ignores the Transient allocation hint in the MAIR and treats
+	 * allocations the same as non-transient allocation types. As a result,
+	 * the LDNP and STNP instructions in AArch64 behave the same as the
+	 * equivalent LDP and STP instructions.
+	 *
+	 * This is relevant only for revisions <= r0p3 of Cortex-A53.
+	 * From r0p4 and onwards, the bit to disable the hint is enabled by
+	 * default at reset.
+	 *
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------------------------
+	 */
+func a53_disable_non_temporal_hint
+	/*
+	 * Compare r0 against revision r0p3
+	 */
+	mov		r2, lr
+	bl		check_errata_disable_non_temporal_hint
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A53_ACTLR
+	orr64_imm	r0, r1, CORTEX_A53_ACTLR_DTAH
+	stcopr16	r0, r1, CORTEX_A53_ACTLR
+1:
+	bx		lr
+endfunc a53_disable_non_temporal_hint
+
+func check_errata_disable_non_temporal_hint
+	mov	r1, #0x03
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_non_temporal_hint
+
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A53 Errata #855873.
+	 *
+	 * This applies only to revisions >= r0p3 of Cortex A53.
+	 * Earlier revisions of the core are affected as well, but don't
+	 * have the chicken bit in the CPUACTLR register. It is expected that
+	 * the rich OS takes care of that, especially as the workaround is
+	 * shared with other erratas in those revisions of the CPU.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a53_855873_wa
+	/*
+	 * Compare r0 against revision r0p3 and higher
+	 */
+	mov		r2, lr
+	bl		check_errata_855873
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A53_ACTLR
+	orr64_imm	r0, r1, CORTEX_A53_ACTLR_ENDCCASCI
+	stcopr16	r0, r1, CORTEX_A53_ACTLR
+1:
+	bx		lr
+endfunc errata_a53_855873_wa
+
+func check_errata_855873
+	mov	r1, #0x03
+	b	cpu_rev_var_hs
+endfunc check_errata_855873
+
 	/* -------------------------------------------------
 	 * The CPU Ops reset function for Cortex-A53.
+	 * Shall clobber: r0-r6
 	 * -------------------------------------------------
 	 */
 func cortex_a53_reset_func
+	mov	r5, lr
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+#if ERRATA_A53_826319
+	mov	r0, r4
+	bl	errata_a53_826319_wa
+#endif
+
+#if ERRATA_A53_836870
+	mov	r0, r4
+	bl	a53_disable_non_temporal_hint
+#endif
+
+#if ERRATA_A53_855873
+	mov	r0, r4
+	bl	errata_a53_855873_wa
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * ---------------------------------------------
@@ -36,7 +163,7 @@
 	orr64_imm	r0, r1, CORTEX_A53_ECTLR_SMP_BIT
 	stcopr16	r0, r1,	CORTEX_A53_ECTLR
 	isb
-	bx	lr
+	bx	r5
 endfunc cortex_a53_reset_func
 
 	/* ----------------------------------------------------
@@ -111,6 +238,29 @@
 	b	cortex_a53_disable_smp
 endfunc cortex_a53_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A53. Must follow AAPCS.
+ */
+func cortex_a53_errata_report
+	push	{r12, lr}
+
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A53_826319, cortex_a53, 826319
+	report_errata ERRATA_A53_836870, cortex_a53, disable_non_temporal_hint
+	report_errata ERRATA_A53_855873, cortex_a53, 855873
+
+	pop	{r12, lr}
+	bx	lr
+endfunc cortex_a53_errata_report
+#endif
+
 declare_cpu_ops cortex_a53, CORTEX_A53_MIDR, \
 	cortex_a53_reset_func, \
 	cortex_a53_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cortex_a57.S b/lib/cpus/aarch32/cortex_a57.S
index ed47846..a791e4e 100644
--- a/lib/cpus/aarch32/cortex_a57.S
+++ b/lib/cpus/aarch32/cortex_a57.S
@@ -50,11 +50,312 @@
 	bx	lr
 endfunc cortex_a57_disable_ext_debug
 
+	/* --------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #806969.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * --------------------------------------------------
+	 */
+func errata_a57_806969_wa
+	/*
+	 * Compare r0 against revision r0p0
+	 */
+	mov		r2, lr
+	bl		check_errata_806969
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_NO_ALLOC_WBWA
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx	lr
+endfunc errata_a57_806969_wa
+
+func check_errata_806969
+	mov	r1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_806969
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #813419.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * ---------------------------------------------------
+	 */
+func check_errata_813419
+	/*
+	 * Even though this is only needed for revision r0p0, it
+	 * is always applied due to limitations of the current
+	 * errata framework.
+	 */
+	mov	r0, #ERRATA_APPLIES
+	bx	lr
+endfunc check_errata_813419
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #813420.
+	 * This applies only to revision r0p0 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_813420_wa
+	/*
+	 * Compare r0 against revision r0p0
+	 */
+	mov		r2, lr
+	bl		check_errata_813420
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_DCC_AS_DCCI
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_813420_wa
+
+func check_errata_813420
+	mov	r1, #0x00
+	b	cpu_rev_var_ls
+endfunc check_errata_813420
+
+	/* --------------------------------------------------------------------
+	 * Disable the over-read from the LDNP instruction.
+	 *
+	 * This applies to all revisions <= r1p2. The performance degradation
+	 * observed with LDNP/STNP has been fixed on r1p3 and onwards.
+	 *
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------------------------
+	 */
+func a57_disable_ldnp_overread
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_disable_ldnp_overread
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_DIS_OVERREAD
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc a57_disable_ldnp_overread
+
+func check_errata_disable_ldnp_overread
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_disable_ldnp_overread
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826974.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826974_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_826974
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_DIS_LOAD_PASS_DMB
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_826974_wa
+
+func check_errata_826974
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826974
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #826977.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_826977_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_826977
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_GRE_NGRE_AS_NGNRE
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_826977_wa
+
+func check_errata_826977
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_826977
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #828024.
+	 * This applies only to revision <= r1p1 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_828024_wa
+	/*
+	 * Compare r0 against revision r1p1
+	 */
+	mov		r2, lr
+	bl		check_errata_828024
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	/*
+	 * Setting the relevant bits in CORTEX_A57_ACTLR has to be done in 2
+	 * instructions here because the resulting bitmask doesn't fit in a
+	 * 16-bit value so it cannot be encoded in a single instruction.
+	 */
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_NO_ALLOC_WBWA
+	orr64_imm	r0, r1, (CORTEX_A57_ACTLR_DIS_L1_STREAMING | CORTEX_A57_ACTLR_DIS_STREAMING)
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_828024_wa
+
+func check_errata_828024
+	mov	r1, #0x11
+	b	cpu_rev_var_ls
+endfunc check_errata_828024
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #829520.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_829520_wa
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_829520
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r0, r1, CORTEX_A57_ACTLR_DIS_INDIRECT_PREDICTOR
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_829520_wa
+
+func check_errata_829520
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_829520
+
+	/* ---------------------------------------------------
+	 * Errata Workaround for Cortex A57 Errata #833471.
+	 * This applies only to revision <= r1p2 of Cortex A57.
+	 * Inputs:
+	 * r0: variant[4:7] and revision[0:3] of current cpu.
+	 * Shall clobber: r0-r3
+	 * ---------------------------------------------------
+	 */
+func errata_a57_833471_wa
+	/*
+	 * Compare r0 against revision r1p2
+	 */
+	mov		r2, lr
+	bl		check_errata_833471
+	mov		lr, r2
+	cmp		r0, #ERRATA_NOT_APPLIES
+	beq		1f
+	ldcopr16	r0, r1, CORTEX_A57_ACTLR
+	orr64_imm	r1, r1, CORTEX_A57_ACTLR_FORCE_FPSCR_FLUSH
+	stcopr16	r0, r1, CORTEX_A57_ACTLR
+1:
+	bx		lr
+endfunc errata_a57_833471_wa
+
+func check_errata_833471
+	mov	r1, #0x12
+	b	cpu_rev_var_ls
+endfunc check_errata_833471
+
 	/* -------------------------------------------------
 	 * The CPU Ops reset function for Cortex-A57.
+	 * Shall clobber: r0-r6
 	 * -------------------------------------------------
 	 */
 func cortex_a57_reset_func
+	mov	r5, lr
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+#if ERRATA_A57_806969
+	mov	r0, r4
+	bl	errata_a57_806969_wa
+#endif
+
+#if ERRATA_A57_813420
+	mov	r0, r4
+	bl	errata_a57_813420_wa
+#endif
+
+#if A57_DISABLE_NON_TEMPORAL_HINT
+	mov	r0, r4
+	bl	a57_disable_ldnp_overread
+#endif
+
+#if ERRATA_A57_826974
+	mov	r0, r4
+	bl	errata_a57_826974_wa
+#endif
+
+#if ERRATA_A57_826977
+	mov	r0, r4
+	bl	errata_a57_826977_wa
+#endif
+
+#if ERRATA_A57_828024
+	mov	r0, r4
+	bl	errata_a57_828024_wa
+#endif
+
+#if ERRATA_A57_829520
+	mov	r0, r4
+	bl	errata_a57_829520_wa
+#endif
+
+#if ERRATA_A57_833471
+	mov	r0, r4
+	bl	errata_a57_833471_wa
+#endif
+
 	/* ---------------------------------------------
 	 * Enable the SMP bit.
 	 * ---------------------------------------------
@@ -63,7 +364,7 @@
 	orr64_imm	r0, r1, CORTEX_A57_ECTLR_SMP_BIT
 	stcopr16	r0, r1,	CORTEX_A57_ECTLR
 	isb
-	bx	lr
+	bx	r5
 endfunc cortex_a57_reset_func
 
 	/* ----------------------------------------------------
@@ -162,6 +463,36 @@
 	b	cortex_a57_disable_ext_debug
 endfunc cortex_a57_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A57. Must follow AAPCS.
+ */
+func cortex_a57_errata_report
+	push	{r12, lr}
+
+	bl	cpu_get_rev_var
+	mov	r4, r0
+
+	/*
+	 * Report all errata. The revision-variant information is passed to
+	 * checking functions of each errata.
+	 */
+	report_errata ERRATA_A57_806969, cortex_a57, 806969
+	report_errata ERRATA_A57_813419, cortex_a57, 813419
+	report_errata ERRATA_A57_813420, cortex_a57, 813420
+	report_errata A57_DISABLE_NON_TEMPORAL_HINT, cortex_a57, \
+		disable_ldnp_overread
+	report_errata ERRATA_A57_826974, cortex_a57, 826974
+	report_errata ERRATA_A57_826977, cortex_a57, 826977
+	report_errata ERRATA_A57_828024, cortex_a57, 828024
+	report_errata ERRATA_A57_829520, cortex_a57, 829520
+	report_errata ERRATA_A57_833471, cortex_a57, 833471
+
+	pop	{r12, lr}
+	bx	lr
+endfunc cortex_a57_errata_report
+#endif
+
 declare_cpu_ops cortex_a57, CORTEX_A57_MIDR, \
 	cortex_a57_reset_func, \
 	cortex_a57_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
index dc78f6e..bfdc1e4 100644
--- a/lib/cpus/aarch32/cpu_helpers.S
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -182,6 +182,19 @@
 	bx	lr
 endfunc cpu_rev_var_ls
 
+/*
+ * Compare the CPU's revision-variant (r0) with a given value (r1), for errata
+ * application purposes. If the revision-variant is higher than or same as a
+ * given value, indicates that errata applies; otherwise not.
+ */
+	.globl	cpu_rev_var_hs
+func cpu_rev_var_hs
+	cmp	r0, r1
+	movge	r0, #ERRATA_APPLIES
+	movlt	r0, #ERRATA_NOT_APPLIES
+	bx	lr
+endfunc cpu_rev_var_hs
+
 #if REPORT_ERRATA
 /*
  * void print_errata_status(void);
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index 020f3a3..3e7a5b7 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -75,36 +75,44 @@
 	if (security_state != SECURE)
 		scr |= SCR_NS_BIT;
 
-	/*
-	 * Set up SCTLR for the Non Secure context.
-	 * EE bit is taken from the entrypoint attributes
-	 * M, C and I bits must be zero (as required by PSCI specification)
-	 *
-	 * The target exception level is based on the spsr mode requested.
-	 * If execution is requested to hyp mode, HVC is enabled
-	 * via SCR.HCE.
-	 *
-	 * Always compute the SCTLR_EL1 value and save in the cpu_context
-	 * - the HYP registers are set up by cm_preapre_ns_entry() as they
-	 * are not part of the stored cpu_context
-	 *
-	 * TODO: In debug builds the spsr should be validated and checked
-	 * against the CPU support, security state, endianness and pc
-	 */
 	if (security_state != SECURE) {
-		sctlr = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
 		/*
-		 * In addition to SCTLR_RES1, set the CP15_BEN, nTWI & nTWE
-		 * bits that architecturally reset to 1.
+		 * Set up SCTLR for the Non-secure context.
+		 *
+		 * SCTLR.EE: Endianness is taken from the entrypoint attributes.
+		 *
+		 * SCTLR.M, SCTLR.C and SCTLR.I: These fields must be zero (as
+		 *  required by PSCI specification)
+		 *
+		 * Set remaining SCTLR fields to their architecturally defined
+		 * values. Some fields reset to an IMPLEMENTATION DEFINED value:
+		 *
+		 * SCTLR.TE: Set to zero so that exceptions to an Exception
+		 *  Level executing at PL1 are taken to A32 state.
+		 *
+		 * SCTLR.V: Set to zero to select the normal exception vectors
+		 *  with base address held in VBAR.
 		 */
-		sctlr |= SCTLR_RES1 | SCTLR_CP15BEN_BIT |
-				SCTLR_NTWI_BIT | SCTLR_NTWE_BIT;
+		assert(((ep->spsr >> SPSR_E_SHIFT) & SPSR_E_MASK) ==
+			(EP_GET_EE(ep->h.attr) >> EP_EE_SHIFT));
+
+		sctlr = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
+		sctlr |= (SCTLR_RESET_VAL & ~(SCTLR_TE_BIT | SCTLR_V_BIT));
 		write_ctx_reg(reg_ctx, CTX_NS_SCTLR, sctlr);
 	}
 
+	/*
+	 * The target exception level is based on the spsr mode requested. If
+	 * execution is requested to hyp mode, HVC is enabled via SCR.HCE.
+	 */
 	if (GET_M32(ep->spsr) == MODE32_hyp)
 		scr |= SCR_HCE_BIT;
 
+	/*
+	 * Store the initialised values for SCTLR and SCR in the cpu_context.
+	 * The Hyp mode registers are not part of the saved context and are
+	 * set-up in cm_prepare_el3_exit().
+	 */
 	write_ctx_reg(reg_ctx, CTX_SCR, scr);
 	write_ctx_reg(reg_ctx, CTX_LR, ep->pc);
 	write_ctx_reg(reg_ctx, CTX_SPSR, ep->spsr);
@@ -151,7 +159,7 @@
  ******************************************************************************/
 void cm_prepare_el3_exit(uint32_t security_state)
 {
-	uint32_t sctlr, scr, hcptr;
+	uint32_t hsctlr, scr;
 	cpu_context_t *ctx = cm_get_context(security_state);
 
 	assert(ctx);
@@ -160,9 +168,9 @@
 		scr = read_ctx_reg(get_regs_ctx(ctx), CTX_SCR);
 		if (scr & SCR_HCE_BIT) {
 			/* Use SCTLR value to initialize HSCTLR */
-			sctlr = read_ctx_reg(get_regs_ctx(ctx),
+			hsctlr = read_ctx_reg(get_regs_ctx(ctx),
 						 CTX_NS_SCTLR);
-			sctlr |= HSCTLR_RES1;
+			hsctlr |= HSCTLR_RES1;
 			/* Temporarily set the NS bit to access HSCTLR */
 			write_scr(read_scr() | SCR_NS_BIT);
 			/*
@@ -170,7 +178,7 @@
 			 * we can access HSCTLR
 			 */
 			isb();
-			write_hsctlr(sctlr);
+			write_hsctlr(hsctlr);
 			isb();
 
 			write_scr(read_scr() & ~SCR_NS_BIT);
@@ -184,48 +192,92 @@
 			write_scr(read_scr() | SCR_NS_BIT);
 			isb();
 
-			/* PL2 present but unused, need to disable safely */
-			write_hcr(0);
+			/*
+			 * Hyp / PL2 present but unused, need to disable safely.
+			 * HSCTLR can be ignored in this case.
+			 *
+			 * Set HCR to its architectural reset value so that
+			 * Non-secure operations do not trap to Hyp mode.
+			 */
+			write_hcr(HCR_RESET_VAL);
 
-			/* HSCTLR : can be ignored when bypassing */
+			/*
+			 * Set HCPTR to its architectural reset value so that
+			 * Non-secure access from EL1 or EL0 to trace and to
+			 * Advanced SIMD and floating point functionality does
+			 * not trap to Hyp mode.
+			 */
+			write_hcptr(HCPTR_RESET_VAL);
 
-			/* HCPTR : disable all traps TCPAC, TTA, TCP */
-			hcptr = read_hcptr();
-			hcptr &= ~(TCPAC_BIT | TTA_BIT | TCP11_BIT | TCP10_BIT);
-			write_hcptr(hcptr);
+			/*
+			 * Initialise CNTHCTL. All fields are architecturally
+			 * UNKNOWN on reset and are set to zero except for
+			 * field(s) listed below.
+			 *
+			 * CNTHCTL.PL1PCEN: Disable traps to Hyp mode of
+			 *  Non-secure EL0 and EL1 accessed to the physical
+			 *  timer registers.
+			 *
+			 * CNTHCTL.PL1PCTEN: Disable traps to Hyp mode of
+			 *  Non-secure EL0 and EL1 accessed to the physical
+			 *  counter registers.
+			 */
+			write_cnthctl(CNTHCTL_RESET_VAL |
+					PL1PCEN_BIT | PL1PCTEN_BIT);
 
-			/* Enable EL1 access to timer */
-			write_cnthctl(PL1PCEN_BIT | PL1PCTEN_BIT);
-
-			/* Reset CNTVOFF_EL2 */
+			/*
+			 * Initialise CNTVOFF to zero as it resets to an
+			 * IMPLEMENTATION DEFINED value.
+			 */
 			write64_cntvoff(0);
 
-			/* Set VPIDR, VMPIDR to match MIDR, MPIDR */
+			/*
+			 * Set VPIDR and VMPIDR to match MIDR_EL1 and MPIDR
+			 * respectively.
+			 */
 			write_vpidr(read_midr());
 			write_vmpidr(read_mpidr());
 
 			/*
-			 * Reset VTTBR.
-			 * Needed because cache maintenance operations depend on
-			 * the VMID even when non-secure EL1&0 stage 2 address
-			 * translation are disabled.
+			 * Initialise VTTBR, setting all fields rather than
+			 * relying on the hw. Some fields are architecturally
+			 * UNKNOWN at reset.
+			 *
+			 * VTTBR.VMID: Set to zero which is the architecturally
+			 *  defined reset value. Even though EL1&0 stage 2
+			 *  address translation is disabled, cache maintenance
+			 *  operations depend on the VMID.
+			 *
+			 * VTTBR.BADDR: Set to zero as EL1&0 stage 2 address
+			 *  translation is disabled.
 			 */
-			write64_vttbr(0);
+			write64_vttbr(VTTBR_RESET_VAL &
+				~((VTTBR_VMID_MASK << VTTBR_VMID_SHIFT)
+				| (VTTBR_BADDR_MASK << VTTBR_BADDR_SHIFT)));
 
 			/*
-			 * Avoid unexpected debug traps in case where HDCR
-			 * is not completely reset by the hardware - set
-			 * HDCR.HPMN to PMCR.N and zero the remaining bits.
-			 * The HDCR.HPMN and PMCR.N fields are the same size
-			 * (5 bits) and HPMN is at offset zero within HDCR.
+			 * Initialise HDCR, setting all the fields rather than
+			 * relying on hw.
+			 *
+			 * HDCR.HPMN: Set to value of PMCR.N which is the
+			 *  architecturally-defined reset value.
 			 */
-			write_hdcr((read_pmcr() & PMCR_N_BITS) >> PMCR_N_SHIFT);
+			write_hdcr(HDCR_RESET_VAL |
+				((read_pmcr() & PMCR_N_BITS) >> PMCR_N_SHIFT));
 
 			/*
-			 * Reset CNTHP_CTL to disable the EL2 physical timer and
-			 * therefore prevent timer interrupts.
+			 * Set HSTR to its architectural reset value so that
+			 * access to system registers in the cproc=1111
+			 * encoding space do not trap to Hyp mode.
 			 */
-			write_cnthp_ctl(0);
+			write_hstr(HSTR_RESET_VAL);
+			/*
+			 * Set CNTHP_CTL to its architectural reset value to
+			 * disable the EL2 physical timer and prevent timer
+			 * interrupts. Some fields are architecturally UNKNOWN
+			 * on reset and are set to zero.
+			 */
+			write_cnthp_ctl(CNTHP_CTL_RESET_VAL);
 			isb();
 
 			write_scr(read_scr() & ~SCR_NS_BIT);
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index afe912a..8a6c11b 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -9,6 +9,7 @@
 #include <context.h>
 
 	.global	el1_sysregs_context_save
+	.global el1_sysregs_context_save_post_ops
 	.global	el1_sysregs_context_restore
 #if CTX_INCLUDE_FPREGS
 	.global	fpregs_context_save
@@ -111,6 +112,36 @@
 /* -----------------------------------------------------
  * The following function strictly follows the AArch64
  * PCS to use x9-x17 (temporary caller-saved registers)
+ * to do post operations after saving the EL1 system
+ * register context.
+ * -----------------------------------------------------
+ */
+func el1_sysregs_context_save_post_ops
+#if ENABLE_SPE_FOR_LOWER_ELS
+	/* Detect if SPE is implemented */
+	mrs	x9, id_aa64dfr0_el1
+	ubfx	x9, x9, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
+	cmp	x9, #0x1
+	b.ne	1f
+
+	/*
+	 * Before switching from normal world to secure world
+	 * the profiling buffers need to be drained out to memory.  This is
+	 * required to avoid an invalid memory access when TTBR is switched
+	 * for entry to SEL1.
+	 */
+	.arch	armv8.2-a+profile
+	psb	csync
+	dsb	nsh
+	.arch	armv8-a
+1:
+#endif
+	ret
+endfunc el1_sysregs_context_save_post_ops
+
+/* -----------------------------------------------------
+ * The following function strictly follows the AArch64
+ * PCS to use x9-x17 (temporary caller-saved registers)
  * to restore EL1 system register context.  It assumes
  * that 'x0' is pointing to a 'el1_sys_regs' structure
  * from where the register context will be restored
@@ -343,7 +374,7 @@
 	ldp	x24, x25, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X24]
 	ldp	x26, x27, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X26]
 	ldp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
-	ldp	 x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
+	ldp	x30, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_LR]
 	msr	sp_el0, x17
 	ldp	x16, x17, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X16]
 	eret
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 0104c4e..5257bf1 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -71,70 +71,58 @@
 	zeromem(ctx, sizeof(*ctx));
 
 	/*
-	 * Base the context SCR on the current value, adjust for entry point
-	 * specific requirements and set trap bits from the IMF
-	 * TODO: provide the base/global SCR bits using another mechanism?
+	 * SCR_EL3 was initialised during reset sequence in macro
+	 * el3_arch_init_common. This code modifies the SCR_EL3 fields that
+	 * affect the next EL.
+	 *
+	 * The following fields are initially set to zero and then updated to
+	 * the required value depending on the state of the SPSR_EL3 and the
+	 * Security state and entrypoint attributes of the next EL.
 	 */
 	scr_el3 = read_scr();
 	scr_el3 &= ~(SCR_NS_BIT | SCR_RW_BIT | SCR_FIQ_BIT | SCR_IRQ_BIT |
 			SCR_ST_BIT | SCR_HCE_BIT);
-
+	/*
+	 * SCR_NS: Set the security state of the next EL.
+	 */
 	if (security_state != SECURE)
 		scr_el3 |= SCR_NS_BIT;
-
+	/*
+	 * SCR_EL3.RW: Set the execution state, AArch32 or AArch64, for next
+	 *  Exception level as specified by SPSR.
+	 */
 	if (GET_RW(ep->spsr) == MODE_RW_64)
 		scr_el3 |= SCR_RW_BIT;
-
+	/*
+	 * SCR_EL3.ST: Traps Secure EL1 accesses to the Counter-timer Physical
+	 *  Secure timer registers to EL3, from AArch64 state only, if specified
+	 *  by the entrypoint attributes.
+	 */
 	if (EP_GET_ST(ep->h.attr))
 		scr_el3 |= SCR_ST_BIT;
 
 #ifndef HANDLE_EA_EL3_FIRST
-	/* Explicitly stop to trap aborts from lower exception levels. */
+	/*
+	 * SCR_EL3.EA: Do not route External Abort and SError Interrupt External
+	 *  to EL3 when executing at a lower EL. When executing at EL3, External
+	 *  Aborts are taken to EL3.
+	 */
 	scr_el3 &= ~SCR_EA_BIT;
 #endif
 
 #ifdef IMAGE_BL31
 	/*
-	 * IRQ/FIQ bits only need setting if interrupt routing
-	 * model has been set up for BL31.
+	 * SCR_EL3.IRQ, SCR_EL3.FIQ: Enable the physical FIQ and IRQ rounting as
+	 *  indicated by the interrupt routing model for BL31.
 	 */
 	scr_el3 |= get_scr_el3_from_routing_model(security_state);
 #endif
 
 	/*
-	 * Set up SCTLR_ELx for the target exception level:
-	 * EE bit is taken from the entrypoint attributes
-	 * M, C and I bits must be zero (as required by PSCI specification)
-	 *
-	 * The target exception level is based on the spsr mode requested.
-	 * If execution is requested to EL2 or hyp mode, HVC is enabled
-	 * via SCR_EL3.HCE.
-	 *
-	 * Always compute the SCTLR_EL1 value and save in the cpu_context
-	 * - the EL2 registers are set up by cm_preapre_ns_entry() as they
-	 * are not part of the stored cpu_context
-	 *
-	 * TODO: In debug builds the spsr should be validated and checked
-	 * against the CPU support, security state, endianess and pc
+	 * SCR_EL3.HCE: Enable HVC instructions if next execution state is
+	 * AArch64 and next EL is EL2, or if next execution state is AArch32 and
+	 * next mode is Hyp.
 	 */
-	sctlr_elx = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
-	if (GET_RW(ep->spsr) == MODE_RW_64)
-		sctlr_elx |= SCTLR_EL1_RES1;
-	else {
-		sctlr_elx |= SCTLR_AARCH32_EL1_RES1;
-		/*
-		 * If lower non-secure EL is AArch32, enable the CP15BEN, nTWI
-		 * & nTWI bits. This aligns with SCTLR initialization on
-		 * systems with an AArch32 EL3, where these bits
-		 * architecturally reset to 1.
-		 */
-		if (security_state != SECURE)
-			sctlr_elx |= SCTLR_CP15BEN_BIT | SCTLR_NTWI_BIT
-						| SCTLR_NTWE_BIT;
-	}
-
-	write_ctx_reg(get_sysregs_ctx(ctx), CTX_SCTLR_EL1, sctlr_elx);
-
 	if ((GET_RW(ep->spsr) == MODE_RW_64
 	     && GET_EL(ep->spsr) == MODE_EL2)
 	    || (GET_RW(ep->spsr) != MODE_RW_64
@@ -142,6 +130,45 @@
 		scr_el3 |= SCR_HCE_BIT;
 	}
 
+	/*
+	 * Initialise SCTLR_EL1 to the reset value corresponding to the target
+	 * execution state setting all fields rather than relying of the hw.
+	 * Some fields have architecturally UNKNOWN reset values and these are
+	 * set to zero.
+	 *
+	 * SCTLR.EE: Endianness is taken from the entrypoint attributes.
+	 *
+	 * SCTLR.M, SCTLR.C and SCTLR.I: These fields must be zero (as
+	 *  required by PSCI specification)
+	 */
+	sctlr_elx = EP_GET_EE(ep->h.attr) ? SCTLR_EE_BIT : 0;
+	if (GET_RW(ep->spsr) == MODE_RW_64)
+		sctlr_elx |= SCTLR_EL1_RES1;
+	else {
+		/*
+		 * If the target execution state is AArch32 then the following
+		 * fields need to be set.
+		 *
+		 * SCTRL_EL1.nTWE: Set to one so that EL0 execution of WFE
+		 *  instructions are not trapped to EL1.
+		 *
+		 * SCTLR_EL1.nTWI: Set to one so that EL0 execution of WFI
+		 *  instructions are not trapped to EL1.
+		 *
+		 * SCTLR_EL1.CP15BEN: Set to one to enable EL0 execution of the
+		 *  CP15DMB, CP15DSB, and CP15ISB instructions.
+		 */
+		sctlr_elx |= SCTLR_AARCH32_EL1_RES1 | SCTLR_CP15BEN_BIT
+					| SCTLR_NTWI_BIT | SCTLR_NTWE_BIT;
+	}
+
+	/*
+	 * Store the initialised SCTLR_EL1 value in the cpu_context - SCTLR_EL2
+	 * and other EL2 resgisters are set up by cm_preapre_ns_entry() as they
+	 * are not part of the stored cpu_context.
+	 */
+	write_ctx_reg(get_sysregs_ctx(ctx), CTX_SCTLR_EL1, sctlr_elx);
+
 	/* Populate EL3 state so that we've the right context before doing ERET */
 	state = get_el3state_ctx(ctx);
 	write_ctx_reg(state, CTX_SCR_EL3, scr_el3);
@@ -191,7 +218,7 @@
  ******************************************************************************/
 void cm_prepare_el3_exit(uint32_t security_state)
 {
-	uint32_t sctlr_elx, scr_el3, cptr_el2;
+	uint32_t sctlr_elx, scr_el3, mdcr_el2;
 	cpu_context_t *ctx = cm_get_context(security_state);
 
 	assert(ctx);
@@ -206,57 +233,167 @@
 			sctlr_elx |= SCTLR_EL2_RES1;
 			write_sctlr_el2(sctlr_elx);
 		} else if (EL_IMPLEMENTED(2)) {
-			/* EL2 present but unused, need to disable safely */
-
-			/* HCR_EL2 = 0, except RW bit set to match SCR_EL3 */
+			/*
+			 * EL2 present but unused, need to disable safely.
+			 * SCTLR_EL2 can be ignored in this case.
+			 *
+			 * Initialise all fields in HCR_EL2, except HCR_EL2.RW,
+			 * to zero so that Non-secure operations do not trap to
+			 * EL2.
+			 *
+			 * HCR_EL2.RW: Set this field to match SCR_EL3.RW
+			 */
 			write_hcr_el2((scr_el3 & SCR_RW_BIT) ? HCR_RW_BIT : 0);
 
-			/* SCTLR_EL2 : can be ignored when bypassing */
+			/*
+			 * Initialise CPTR_EL2 setting all fields rather than
+			 * relying on the hw. All fields have architecturally
+			 * UNKNOWN reset values.
+			 *
+			 * CPTR_EL2.TCPAC: Set to zero so that Non-secure EL1
+			 *  accesses to the CPACR_EL1 or CPACR from both
+			 *  Execution states do not trap to EL2.
+			 *
+			 * CPTR_EL2.TTA: Set to zero so that Non-secure System
+			 *  register accesses to the trace registers from both
+			 *  Execution states do not trap to EL2.
+			 *
+			 * CPTR_EL2.TFP: Set to zero so that Non-secure accesses
+			 *  to SIMD and floating-point functionality from both
+			 *  Execution states do not trap to EL2.
+			 */
+			write_cptr_el2(CPTR_EL2_RESET_VAL &
+					~(CPTR_EL2_TCPAC_BIT | CPTR_EL2_TTA_BIT
+					| CPTR_EL2_TFP_BIT));
 
-			/* CPTR_EL2 : disable all traps TCPAC, TTA, TFP */
-			cptr_el2 = read_cptr_el2();
-			cptr_el2 &= ~(TCPAC_BIT | TTA_BIT | TFP_BIT);
-			write_cptr_el2(cptr_el2);
+			/*
+			 * Initiliase CNTHCTL_EL2. All fields are
+			 * architecturally UNKNOWN on reset and are set to zero
+			 * except for field(s) listed below.
+			 *
+			 * CNTHCTL_EL2.EL1PCEN: Set to one to disable traps to
+			 *  Hyp mode of Non-secure EL0 and EL1 accesses to the
+			 *  physical timer registers.
+			 *
+			 * CNTHCTL_EL2.EL1PCTEN: Set to one to disable traps to
+			 *  Hyp mode of  Non-secure EL0 and EL1 accesses to the
+			 *  physical counter registers.
+			 */
+			write_cnthctl_el2(CNTHCTL_RESET_VAL |
+						EL1PCEN_BIT | EL1PCTEN_BIT);
 
-			/* Enable EL1 access to timer */
-			write_cnthctl_el2(EL1PCEN_BIT | EL1PCTEN_BIT);
-
-			/* Reset CNTVOFF_EL2 */
+			/*
+			 * Initialise CNTVOFF_EL2 to zero as it resets to an
+			 * architecturally UNKNOWN value.
+			 */
 			write_cntvoff_el2(0);
 
-			/* Set VPIDR, VMPIDR to match MIDR, MPIDR */
+			/*
+			 * Set VPIDR_EL2 and VMPIDR_EL2 to match MIDR_EL1 and
+			 * MPIDR_EL1 respectively.
+			 */
 			write_vpidr_el2(read_midr_el1());
 			write_vmpidr_el2(read_mpidr_el1());
 
 			/*
-			 * Reset VTTBR_EL2.
-			 * Needed because cache maintenance operations depend on
-			 * the VMID even when non-secure EL1&0 stage 2 address
-			 * translation are disabled.
+			 * Initialise VTTBR_EL2. All fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * VTTBR_EL2.VMID: Set to zero. Even though EL1&0 stage
+			 *  2 address translation is disabled, cache maintenance
+			 *  operations depend on the VMID.
+			 *
+			 * VTTBR_EL2.BADDR: Set to zero as EL1&0 stage 2 address
+			 *  translation is disabled.
 			 */
-			write_vttbr_el2(0);
+			write_vttbr_el2(VTTBR_RESET_VAL &
+				~((VTTBR_VMID_MASK << VTTBR_VMID_SHIFT)
+				| (VTTBR_BADDR_MASK << VTTBR_BADDR_SHIFT)));
+
 			/*
-			 * Avoid unexpected debug traps in case where MDCR_EL2
-			 * is not completely reset by the hardware - set
-			 * MDCR_EL2.HPMN to PMCR_EL0.N and zero the remaining
-			 * bits.
-			 * MDCR_EL2.HPMN and PMCR_EL0.N fields are the same size
-			 * (5 bits) and HPMN is at offset zero within MDCR_EL2.
+			 * Initialise MDCR_EL2, setting all fields rather than
+			 * relying on hw. Some fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * MDCR_EL2.TPMS (ARM v8.2): Do not trap statistical
+			 * profiling controls to EL2.
+			 *
+			 * MDCR_EL2.E2PB (ARM v8.2): SPE enabled in non-secure
+			 * state. Accesses to profiling buffer controls at
+			 * non-secure EL1 are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDRA: Set to zero so that Non-secure EL0 and
+			 *  EL1 System register accesses to the Debug ROM
+			 *  registers are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDOSA: Set to zero so that Non-secure EL1
+			 *  System register accesses to the powerdown debug
+			 *  registers are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TDA: Set to zero so that System register
+			 *  accesses to the debug registers do not trap to EL2.
+			 *
+			 * MDCR_EL2.TDE: Set to zero so that debug exceptions
+			 *  are not routed to EL2.
+			 *
+			 * MDCR_EL2.HPME: Set to zero to disable EL2 Performance
+			 *  Monitors.
+			 *
+			 * MDCR_EL2.TPM: Set to zero so that Non-secure EL0 and
+			 *  EL1 accesses to all Performance Monitors registers
+			 *  are not trapped to EL2.
+			 *
+			 * MDCR_EL2.TPMCR: Set to zero so that Non-secure EL0
+			 *  and EL1 accesses to the PMCR_EL0 or PMCR are not
+			 *  trapped to EL2.
+			 *
+			 * MDCR_EL2.HPMN: Set to value of PMCR_EL0.N which is the
+			 *  architecturally-defined reset value.
 			 */
-			write_mdcr_el2((read_pmcr_el0() & PMCR_EL0_N_BITS)
-					>> PMCR_EL0_N_SHIFT);
+			mdcr_el2 = ((MDCR_EL2_RESET_VAL |
+					((read_pmcr_el0() & PMCR_EL0_N_BITS)
+					>> PMCR_EL0_N_SHIFT)) &
+					~(MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT
+					| MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT
+					| MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT
+					| MDCR_EL2_TPMCR_BIT));
+
+#if ENABLE_SPE_FOR_LOWER_ELS
+			uint64_t id_aa64dfr0_el1;
+
+			/* Detect if SPE is implemented */
+			id_aa64dfr0_el1 = read_id_aa64dfr0_el1() >>
+				ID_AA64DFR0_PMS_SHIFT;
+			if ((id_aa64dfr0_el1 & ID_AA64DFR0_PMS_MASK) == 1) {
+				/*
+				 * Make sure traps to EL2 are not generated if
+				 * EL2 is implemented but not used.
+				 */
+				mdcr_el2 &= ~MDCR_EL2_TPMS;
+				mdcr_el2 |= MDCR_EL2_E2PB(MDCR_EL2_E2PB_EL1);
+			}
+#endif
+
+			write_mdcr_el2(mdcr_el2);
+
 			/*
-			 * Avoid unexpected traps of non-secure access to
-			 * certain system registers at EL1 or lower where
-			 * HSTR_EL2 is not completely reset to zero by the
-			 * hardware - zero the entire register.
+			 * Initialise HSTR_EL2. All fields are architecturally
+			 * UNKNOWN on reset.
+			 *
+			 * HSTR_EL2.T<n>: Set all these fields to zero so that
+			 *  Non-secure EL0 or EL1 accesses to System registers
+			 *  do not trap to EL2.
 			 */
-			write_hstr_el2(0);
+			write_hstr_el2(HSTR_EL2_RESET_VAL & ~(HSTR_EL2_T_MASK));
 			/*
-			 * Reset CNTHP_CTL_EL2 to disable the EL2 physical timer
-			 * and therefore prevent timer interrupts.
+			 * Initialise CNTHP_CTL_EL2. All fields are
+			 * architecturally UNKNOWN on reset.
+			 *
+			 * CNTHP_CTL_EL2:ENABLE: Set to zero to disable the EL2
+			 *  physical timer and prevent timer interrupts.
 			 */
-			write_cnthp_ctl_el2(0);
+			write_cnthp_ctl_el2(CNTHP_CTL_RESET_VAL &
+						~(CNTHP_CTL_ENABLE_BIT));
 		}
 	}
 
@@ -278,6 +415,7 @@
 	assert(ctx);
 
 	el1_sysregs_context_save(get_sysregs_ctx(ctx));
+	el1_sysregs_context_save_post_ops();
 }
 
 void cm_el1_sysregs_context_restore(uint32_t security_state)
diff --git a/lib/xlat_tables/aarch32/xlat_tables.c b/lib/xlat_tables/aarch32/xlat_tables.c
index 3c9051c..9c15624 100644
--- a/lib/xlat_tables/aarch32/xlat_tables.c
+++ b/lib/xlat_tables/aarch32/xlat_tables.c
@@ -149,7 +149,7 @@
 	 * and translation register writes are committed
 	 * before enabling the MMU
 	 */
-	dsb();
+	dsbish();
 	isb();
 
 	sctlr = read_sctlr();
diff --git a/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
index afc65e7..40fd2d0 100644
--- a/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
@@ -141,7 +141,7 @@
 	 * and translation register writes are committed
 	 * before enabling the MMU
 	 */
-	dsb();
+	dsbish();
 	isb();
 
 	sctlr = read_sctlr();
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 2c8f82a..9946fea 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -136,3 +136,20 @@
 # required to enable cache coherency after warm reset (eg: single cluster
 # platforms).
 WARMBOOT_ENABLE_DCACHE_EARLY	:= 0
+
+# By default, enable Statistical Profiling Extensions.
+# The top level Makefile will disable this feature depending on
+# the target architecture and version number.
+ENABLE_SPE_FOR_LOWER_ELS	:= 1
+
+# SPE is enabled by default but only supported on AArch64 8.2 onwards.
+# Disable it in all other cases.
+ifeq (${ARCH},aarch32)
+    override ENABLE_SPE_FOR_LOWER_ELS := 0
+else
+    ifeq (${ARM_ARCH_MAJOR},8)
+        ifeq ($(ARM_ARCH_MINOR),$(filter $(ARM_ARCH_MINOR),0 1))
+            ENABLE_SPE_FOR_LOWER_ELS := 0
+        endif
+    endif
+endif
diff --git a/plat/arm/board/fvp/fvp_pm.c b/plat/arm/board/fvp/fvp_pm.c
index f4df658..e39a4d5 100644
--- a/plat/arm/board/fvp/fvp_pm.c
+++ b/plat/arm/board/fvp/fvp_pm.c
@@ -48,6 +48,14 @@
 {
 	uint64_t mpidr = read_mpidr_el1();
 
+#if ENABLE_SPE_FOR_LOWER_ELS
+	/*
+	 * On power down we need to disable statistical profiling extensions
+	 * before exiting coherency.
+	 */
+	arm_disable_spe();
+#endif
+
 	/* Disable coherency if this cluster is to be turned off */
 	fvp_interconnect_disable();
 
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index d9d6eb1..f13fc8e 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -56,6 +56,9 @@
 #define PLAT_ARM_BL31_RUN_UART_BASE		V2M_IOFPGA_UART1_BASE
 #define PLAT_ARM_BL31_RUN_UART_CLK_IN_HZ	V2M_IOFPGA_UART1_CLK_IN_HZ
 
+#define PLAT_ARM_SP_MIN_RUN_UART_BASE		V2M_IOFPGA_UART1_BASE
+#define PLAT_ARM_SP_MIN_RUN_UART_CLK_IN_HZ	V2M_IOFPGA_UART1_CLK_IN_HZ
+
 #define PLAT_ARM_CRASH_UART_BASE	PLAT_ARM_BL31_RUN_UART_BASE
 #define PLAT_ARM_CRASH_UART_CLK_IN_HZ	PLAT_ARM_BL31_RUN_UART_CLK_IN_HZ
 
diff --git a/plat/arm/board/juno/aarch32/juno_helpers.S b/plat/arm/board/juno/aarch32/juno_helpers.S
index 5044a24..824002a 100644
--- a/plat/arm/board/juno/aarch32/juno_helpers.S
+++ b/plat/arm/board/juno/aarch32/juno_helpers.S
@@ -81,9 +81,9 @@
 	 * Cortex-A57 specific settings
 	 * --------------------------------------------------------------------
 	 */
-	mov	r0, #((L2_DATA_RAM_LATENCY_3_CYCLES << L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
-		      (L2_TAG_RAM_LATENCY_3_CYCLES << L2CTLR_TAG_RAM_LATENCY_SHIFT))
-	stcopr	r0, L2CTLR
+	mov	r0, #((CORTEX_A57_L2_DATA_RAM_LATENCY_3_CYCLES << CORTEX_A57_L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
+		      (CORTEX_A57_L2_TAG_RAM_LATENCY_3_CYCLES << CORTEX_A57_L2CTLR_TAG_RAM_LATENCY_SHIFT))
+	stcopr	r0, CORTEX_A57_L2CTLR
 1:
 	isb
 	bx	lr
@@ -118,8 +118,8 @@
 	 * Cortex-A57 specific settings
 	 * --------------------------------------------------------------------
 	 */
-	mov	r0, #(L2_DATA_RAM_LATENCY_3_CYCLES << L2CTLR_DATA_RAM_LATENCY_SHIFT)
-	stcopr	r0, L2CTLR
+	mov	r0, #(CORTEX_A57_L2_DATA_RAM_LATENCY_3_CYCLES << CORTEX_A57_L2CTLR_DATA_RAM_LATENCY_SHIFT)
+	stcopr	r0, CORTEX_A57_L2CTLR
 	isb
 	bx	lr
 endfunc JUNO_HANDLER(1)
@@ -152,9 +152,9 @@
 	 * Cortex-A72 specific settings
 	 * --------------------------------------------------------------------
 	 */
-	mov	r0, #((L2_DATA_RAM_LATENCY_3_CYCLES << L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
-		      (L2_TAG_RAM_LATENCY_2_CYCLES << L2CTLR_TAG_RAM_LATENCY_SHIFT))
-	stcopr	r0, L2CTLR
+	mov	r0, #((CORTEX_A72_L2_DATA_RAM_LATENCY_3_CYCLES << CORTEX_A72_L2CTLR_DATA_RAM_LATENCY_SHIFT) |	\
+		      (CORTEX_A72_L2_TAG_RAM_LATENCY_2_CYCLES << CORTEX_A72_L2CTLR_TAG_RAM_LATENCY_SHIFT))
+	stcopr	r0, CORTEX_A72_L2CTLR
 	isb
 	bx	lr
 endfunc JUNO_HANDLER(2)
diff --git a/plat/arm/common/aarch64/arm_helpers.S b/plat/arm/common/aarch64/arm_helpers.S
index 1f20cb5..86565f5 100644
--- a/plat/arm/common/aarch64/arm_helpers.S
+++ b/plat/arm/common/aarch64/arm_helpers.S
@@ -12,6 +12,7 @@
 	.globl	plat_crash_console_putc
 	.globl	plat_crash_console_flush
 	.globl	platform_mem_init
+	.globl	arm_disable_spe
 
 
 	/* -----------------------------------------------------
@@ -86,3 +87,31 @@
 func platform_mem_init
 	ret
 endfunc platform_mem_init
+
+	/* -----------------------------------------------------
+	 * void arm_disable_spe (void);
+	 * -----------------------------------------------------
+	 */
+#if ENABLE_SPE_FOR_LOWER_ELS
+func arm_disable_spe
+	/* Detect if SPE is implemented */
+	mrs	x0, id_aa64dfr0_el1
+	ubfx	x0, x0, #ID_AA64DFR0_PMS_SHIFT, #ID_AA64DFR0_PMS_LENGTH
+	cmp	x0, #0x1
+	b.ne	1f
+
+	/* Drain buffered data */
+	.arch	armv8.2-a+profile
+	psb	csync
+	dsb	nsh
+
+	/* Disable Profiling Buffer */
+	mrs	x0, pmblimitr_el1
+	bic	x0, x0, #1
+	msr	pmblimitr_el1, x0
+	isb
+	.arch	armv8-a
+1:
+	ret
+endfunc arm_disable_spe
+#endif
diff --git a/plat/arm/common/sp_min/arm_sp_min_setup.c b/plat/arm/common/sp_min/arm_sp_min_setup.c
index 79a4b6b..c5408c8 100644
--- a/plat/arm/common/sp_min/arm_sp_min_setup.c
+++ b/plat/arm/common/sp_min/arm_sp_min_setup.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -128,6 +128,17 @@
 }
 
 /*******************************************************************************
+ * Perform any SP_MIN platform runtime setup prior to SP_MIN exit.
+ * Common to ARM standard platforms.
+ ******************************************************************************/
+void arm_sp_min_plat_runtime_setup(void)
+{
+	/* Initialize the runtime console */
+	console_init(PLAT_ARM_SP_MIN_RUN_UART_BASE,
+		PLAT_ARM_SP_MIN_RUN_UART_CLK_IN_HZ, ARM_CONSOLE_BAUDRATE);
+}
+
+/*******************************************************************************
  * Perform platform specific setup for SP_MIN
  ******************************************************************************/
 void sp_min_platform_setup(void)
@@ -155,6 +166,11 @@
 	plat_arm_pwrc_setup();
 }
 
+void sp_min_plat_runtime_setup(void)
+{
+	arm_sp_min_plat_runtime_setup();
+}
+
 /*******************************************************************************
  * Perform the very early platform specific architectural setup here. At the
  * moment this only initializes the MMU
diff --git a/plat/common/aarch32/plat_common.c b/plat/common/aarch32/plat_common.c
index f5cfee5..d3799d2 100644
--- a/plat/common/aarch32/plat_common.c
+++ b/plat/common/aarch32/plat_common.c
@@ -4,6 +4,7 @@
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
+#include <console.h>
 #include <platform.h>
 #include <xlat_mmu_helpers.h>
 
@@ -13,8 +14,18 @@
  * platforms but may also be overridden by a platform if required.
  */
 #pragma weak bl32_plat_enable_mmu
+#pragma weak sp_min_plat_runtime_setup
 
 void bl32_plat_enable_mmu(uint32_t flags)
 {
 	enable_mmu_secure(flags);
 }
+
+void sp_min_plat_runtime_setup(void)
+{
+	/*
+	 * Finish the use of console driver in SP_MIN so that any runtime logs
+	 * from SP_MIN will be suppressed.
+	 */
+	console_uninit();
+}
diff --git a/plat/hisilicon/hikey960/hikey960_pm.c b/plat/hisilicon/hikey960/hikey960_pm.c
index 257299e..3447c9f 100644
--- a/plat/hisilicon/hikey960/hikey960_pm.c
+++ b/plat/hisilicon/hikey960/hikey960_pm.c
@@ -102,7 +102,7 @@
 	hisi_powerdn_core(cluster, core);
 
 	/* check if any core is powered up */
-	if (hisi_test_pwrdn_allcores(cluster, core)) {
+	if (hisi_test_cpu_down(cluster, core)) {
 
 		cci_disable_snoop_dvm_reqs(MPIDR_AFFLVL1_VAL(read_mpidr_el1()));
 
diff --git a/plat/socionext/uniphier/platform.mk b/plat/socionext/uniphier/platform.mk
index af8e3ac..7ea0f10 100644
--- a/plat/socionext/uniphier/platform.mk
+++ b/plat/socionext/uniphier/platform.mk
@@ -86,11 +86,29 @@
 				drivers/auth/img_parser_mod.c		\
 				drivers/auth/tbbr/tbbr_cot.c		\
 				plat/common/tbbr/plat_tbbr.c		\
+				$(PLAT_PATH)/uniphier_rotpk.S		\
 				$(PLAT_PATH)/uniphier_tbbr.c
 
 BL1_SOURCES		+=	$(TBB_SOURCES)
 BL2_SOURCES		+=	$(TBB_SOURCES)
 
+ROT_KEY			= $(BUILD_PLAT)/rot_key.pem
+ROTPK_HASH		= $(BUILD_PLAT)/rotpk_sha256.bin
+
+$(eval $(call add_define_val,ROTPK_HASH,'"$(ROTPK_HASH)"'))
+$(BUILD_PLAT)/bl1/uniphier_rotpk.o: $(ROTPK_HASH)
+$(BUILD_PLAT)/bl2/uniphier_rotpk.o: $(ROTPK_HASH)
+
+certificates: $(ROT_KEY)
+$(ROT_KEY):
+	@echo "  OPENSSL $@"
+	$(Q)openssl genrsa 2048 > $@ 2>/dev/null
+
+$(ROTPK_HASH): $(ROT_KEY)
+	@echo "  OPENSSL $@"
+	$(Q)openssl rsa -in $< -pubout -outform DER 2>/dev/null |\
+	openssl dgst -sha256 -binary > $@ 2>/dev/null
+
 endif
 
 .PHONY: bl1_gzip
diff --git a/plat/socionext/uniphier/uniphier_rotpk.S b/plat/socionext/uniphier/uniphier_rotpk.S
new file mode 100644
index 0000000..0045a34
--- /dev/null
+++ b/plat/socionext/uniphier/uniphier_rotpk.S
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+	.global uniphier_rotpk_hash
+	.global uniphier_rotpk_hash_end
+uniphier_rotpk_hash:
+	/* DER header */
+	.byte 0x30, 0x31, 0x30, 0x0D, 0x06, 0x09, 0x60, 0x86, 0x48
+	.byte 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05, 0x00, 0x04, 0x20
+	/* SHA256 */
+	.incbin ROTPK_HASH
+uniphier_rotpk_hash_end:
diff --git a/plat/socionext/uniphier/uniphier_tbbr.c b/plat/socionext/uniphier/uniphier_tbbr.c
index cafe1a3..1c83411 100644
--- a/plat/socionext/uniphier/uniphier_tbbr.c
+++ b/plat/socionext/uniphier/uniphier_tbbr.c
@@ -6,10 +6,14 @@
 
 #include <platform.h>
 
+extern char uniphier_rotpk_hash[], uniphier_rotpk_hash_end[];
+
 int plat_get_rotpk_info(void *cookie, void **key_ptr, unsigned int *key_len,
 			unsigned int *flags)
 {
-	*flags = ROTPK_NOT_DEPLOYED;
+	*key_ptr = uniphier_rotpk_hash;
+	*key_len = uniphier_rotpk_hash_end - uniphier_rotpk_hash;
+	*flags = ROTPK_IS_HASH;
 
 	return 0;
 }