Merge pull request #248 from jcastillo-arm/jc/tf-issues/212_1

Allow BL3-2 to be loaded into the secure region of DRAM
diff --git a/Makefile b/Makefile
index 691725f..c59cdb4 100644
--- a/Makefile
+++ b/Makefile
@@ -63,6 +63,8 @@
 # Flag used to indicate if ASM_ASSERTION should be enabled for the build.
 # This defaults to being present in DEBUG builds only.
 ASM_ASSERTION		:=	${DEBUG}
+# Build option to choose whether Trusted firmware uses Coherent memory or not.
+USE_COHERENT_MEM	:=	1
 # Default FIP file name
 FIP_NAME		:= fip.bin
 
@@ -230,6 +232,10 @@
 # Process LOG_LEVEL flag
 $(eval $(call add_define,LOG_LEVEL))
 
+# Process USE_COHERENT_MEM flag
+$(eval $(call assert_boolean,USE_COHERENT_MEM))
+$(eval $(call add_define,USE_COHERENT_MEM))
+
 ASFLAGS			+= 	-nostdinc -ffreestanding -Wa,--fatal-warnings	\
 				-Werror -Wmissing-include-dirs			\
 				-mgeneral-regs-only -D__ASSEMBLY__		\
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 82330c1..cfc6292 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -131,9 +131,11 @@
 	ldr	x1, =__BSS_SIZE__
 	bl	zeromem16
 
+#if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
 	bl	zeromem16
+#endif
 
 	ldr	x0, =__DATA_RAM_START__
 	ldr	x1, =__DATA_ROM_START__
diff --git a/bl1/bl1.ld.S b/bl1/bl1.ld.S
index 007149b..d682384 100644
--- a/bl1/bl1.ld.S
+++ b/bl1/bl1.ld.S
@@ -107,6 +107,7 @@
         *(xlat_table)
     } >RAM
 
+#if USE_COHERENT_MEM
     /*
      * The base address of the coherent memory section must be page-aligned (4K)
      * to guarantee that the coherent data are stored on their own pages and
@@ -125,6 +126,7 @@
         . = NEXT(4096);
         __COHERENT_RAM_END__ = .;
     } >RAM
+#endif
 
     __BL1_RAM_START__ = ADDR(.data);
     __BL1_RAM_END__ = .;
@@ -140,8 +142,10 @@
 
     __BSS_SIZE__ = SIZEOF(.bss);
 
+#if USE_COHERENT_MEM
     __COHERENT_RAM_UNALIGNED_SIZE__ =
         __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
 
     ASSERT(. <= BL1_RW_LIMIT, "BL1's RW section has exceeded its limit.")
 }
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 2f058da..499dc37 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -91,9 +91,11 @@
 	ldr	x1, =__BSS_SIZE__
 	bl	zeromem16
 
+#if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
 	bl	zeromem16
+#endif
 
 	/* --------------------------------------------
 	 * Allocate a stack whose memory will be marked
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 65304de..9933339 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -93,6 +93,7 @@
         *(xlat_table)
     } >RAM
 
+#if USE_COHERENT_MEM
     /*
      * The base address of the coherent memory section must be page-aligned (4K)
      * to guarantee that the coherent data are stored on their own pages and
@@ -111,12 +112,16 @@
         . = NEXT(4096);
         __COHERENT_RAM_END__ = .;
     } >RAM
+#endif
 
     __BL2_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
+
+#if USE_COHERENT_MEM
     __COHERENT_RAM_UNALIGNED_SIZE__ =
         __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
 
     ASSERT(. <= BL2_LIMIT, "BL2 image has exceeded its limit.")
 }
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 04063e1..01d7a7f 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -61,15 +61,21 @@
 	bic	x0, x0, #SCTLR_EE_BIT
 	msr	sctlr_el3, x0
 	isb
+#endif
 
-	/* -----------------------------------------------------
-	 * Perform any processor specific actions upon reset
-	 * e.g. cache, tlb invalidations etc. Override the
-	 * Boot ROM(BL0) programming sequence
-	 * -----------------------------------------------------
+	/* ---------------------------------------------
+	 * When RESET_TO_BL31 is true, perform any
+	 * processor specific actions upon reset e.g.
+	 * cache, tlb invalidations, errata workarounds
+	 * etc.
+	 * When RESET_TO_BL31 is false, perform any
+	 * processor specific actions which undo or are
+	 * in addition to the actions performed by the
+	 * reset handler in the Boot ROM (BL1).
+	 * ---------------------------------------------
 	 */
 	bl	reset_handler
-#endif
+
 	/* ---------------------------------------------
 	 * Enable the instruction cache, stack pointer
 	 * and data access alignment checks
@@ -149,9 +155,11 @@
 	ldr	x1, =__BSS_SIZE__
 	bl	zeromem16
 
+#if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
 	bl	zeromem16
+#endif
 
 	/* ---------------------------------------------
 	 * Initialize the cpu_ops pointer.
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 124be85..3327f31 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -117,6 +117,7 @@
         *(xlat_table)
     } >RAM
 
+#if USE_COHERENT_MEM
     /*
      * The base address of the coherent memory section must be page-aligned (4K)
      * to guarantee that the coherent data are stored on their own pages and
@@ -135,12 +136,15 @@
         . = NEXT(4096);
         __COHERENT_RAM_END__ = .;
     } >RAM
+#endif
 
     __BL31_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
+#if USE_COHERENT_MEM
     __COHERENT_RAM_UNALIGNED_SIZE__ =
         __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
 
     ASSERT(. <= BL31_LIMIT, "BL3-1 image has exceeded its limit.")
 }
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index f53a41f..4c25a60 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -40,7 +40,6 @@
 				bl31/aarch64/runtime_exceptions.S		\
 				bl31/aarch64/crash_reporting.S			\
 				lib/cpus/aarch64/cpu_helpers.S			\
-				lib/locks/bakery/bakery_lock.c			\
 				lib/locks/exclusive/spinlock.S			\
 				services/std_svc/std_svc_setup.c		\
 				services/std_svc/psci/psci_afflvl_off.c		\
@@ -53,6 +52,12 @@
 				services/std_svc/psci/psci_setup.c		\
 				services/std_svc/psci/psci_system_off.c
 
+ifeq (${USE_COHERENT_MEM}, 1)
+BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_coherent.c
+else
+BL31_SOURCES		+=	lib/locks/bakery/bakery_lock_normal.c
+endif
+
 BL31_LINKERFILE		:=	bl31/bl31.ld.S
 
 # Flag used by the generic interrupt management framework to  determine if
diff --git a/bl31/interrupt_mgmt.c b/bl31/interrupt_mgmt.c
index e595634e..5478902 100644
--- a/bl31/interrupt_mgmt.c
+++ b/bl31/interrupt_mgmt.c
@@ -158,6 +158,45 @@
 	return 0;
 }
 
+/******************************************************************************
+ * This function disables the routing model of interrupt 'type' from the
+ * specified 'security_state' on the local core. The disable is in effect
+ * till the core powers down or till the next enable for that interrupt
+ * type.
+ *****************************************************************************/
+int disable_intr_rm_local(uint32_t type, uint32_t security_state)
+{
+	uint32_t bit_pos, flag;
+
+	assert(intr_type_descs[type].handler);
+
+	flag = get_interrupt_rm_flag(INTR_DEFAULT_RM, security_state);
+
+	bit_pos = plat_interrupt_type_to_line(type, security_state);
+	cm_write_scr_el3_bit(security_state, bit_pos, flag);
+
+	return 0;
+}
+
+/******************************************************************************
+ * This function enables the routing model of interrupt 'type' from the
+ * specified 'security_state' on the local core.
+ *****************************************************************************/
+int enable_intr_rm_local(uint32_t type, uint32_t security_state)
+{
+	uint32_t bit_pos, flag;
+
+	assert(intr_type_descs[type].handler);
+
+	flag = get_interrupt_rm_flag(intr_type_descs[type].flags,
+				security_state);
+
+	bit_pos = plat_interrupt_type_to_line(type, security_state);
+	cm_write_scr_el3_bit(security_state, bit_pos, flag);
+
+	return 0;
+}
+
 /*******************************************************************************
  * This function registers a handler for the 'type' of interrupt specified. It
  * also validates the routing model specified in the 'flags' for this type of
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 1cda165..2714282 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -108,9 +108,11 @@
 	ldr	x1, =__BSS_SIZE__
 	bl	zeromem16
 
+#if USE_COHERENT_MEM
 	ldr	x0, =__COHERENT_RAM_START__
 	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
 	bl	zeromem16
+#endif
 
 	/* --------------------------------------------
 	 * Allocate a stack whose memory will be marked
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index 5d7ffa1..d411ad0 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -98,6 +98,7 @@
         *(xlat_table)
     } >RAM
 
+#if USE_COHERENT_MEM
     /*
      * The base address of the coherent memory section must be page-aligned (4K)
      * to guarantee that the coherent data are stored on their own pages and
@@ -116,12 +117,15 @@
         . = NEXT(4096);
         __COHERENT_RAM_END__ = .;
     } >RAM
+#endif
 
     __BL32_END__ = .;
 
     __BSS_SIZE__ = SIZEOF(.bss);
+#if USE_COHERENT_MEM
     __COHERENT_RAM_UNALIGNED_SIZE__ =
         __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
+#endif
 
     ASSERT(. <= BL32_LIMIT, "BL3-2 image has exceeded its limit.")
 }
diff --git a/bl32/tsp/tsp_main.c b/bl32/tsp/tsp_main.c
index 193ba29..c6000e1 100644
--- a/bl32/tsp/tsp_main.c
+++ b/bl32/tsp/tsp_main.c
@@ -43,7 +43,7 @@
  * of trusted SRAM
  ******************************************************************************/
 extern unsigned long __RO_START__;
-extern unsigned long __COHERENT_RAM_END__;
+extern unsigned long __BL32_END__;
 
 /*******************************************************************************
  * Lock to control access to the console
@@ -63,11 +63,11 @@
 
 /*******************************************************************************
  * The BL32 memory footprint starts with an RO sections and ends
- * with a section for coherent RAM. Use it to find the memory size
+ * with the linker symbol __BL32_END__. Use it to find the memory size
  ******************************************************************************/
 #define BL32_TOTAL_BASE (unsigned long)(&__RO_START__)
 
-#define BL32_TOTAL_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#define BL32_TOTAL_LIMIT (unsigned long)(&__BL32_END__)
 
 static tsp_args_t *set_smc_args(uint64_t arg0,
 			     uint64_t arg1,
@@ -216,7 +216,7 @@
  * this cpu's architectural state is saved in response to an earlier psci
  * cpu_suspend request.
  ******************************************************************************/
-tsp_args_t *tsp_cpu_suspend_main(uint64_t power_state,
+tsp_args_t *tsp_cpu_suspend_main(uint64_t arg0,
 			       uint64_t arg1,
 			       uint64_t arg2,
 			       uint64_t arg3,
@@ -242,8 +242,6 @@
 
 #if LOG_LEVEL >= LOG_LEVEL_INFO
 	spin_lock(&console_lock);
-	INFO("TSP: cpu 0x%x suspend request. power state: 0x%x\n",
-		mpidr, power_state);
 	INFO("TSP: cpu 0x%x: %d smcs, %d erets %d cpu suspend requests\n",
 		mpidr,
 		tsp_stats[linear_id].smc_count,
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index be1f5f3..4a50a7b 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -9,11 +9,13 @@
 4.  [Power State Coordination Interface](#4--power-state-coordination-interface)
 5.  [Secure-EL1 Payloads and Dispatchers](#5--secure-el1-payloads-and-dispatchers)
 6.  [Crash Reporting in BL3-1](#6--crash-reporting-in-bl3-1)
-7.  [CPU specific operations framework](#7--cpu-specific-operations-framework)
-8.  [Memory layout of BL images](#8-memory-layout-of-bl-images)
-9.  [Firmware Image Package (FIP)](#9--firmware-image-package-fip)
-10. [Code Structure](#10--code-structure)
-11. [References](#11--references)
+7.  [Guidelines for Reset Handlers](#7--guidelines-for-reset-handlers)
+8.  [CPU specific operations framework](#8--cpu-specific-operations-framework)
+9.  [Memory layout of BL images](#9-memory-layout-of-bl-images)
+10. [Firmware Image Package (FIP)](#10--firmware-image-package-fip)
+11. [Use of coherent memory in Trusted Firmware](#11--use-of-coherent-memory-in-trusted-firmware)
+12. [Code Structure](#12--code-structure)
+13. [References](#13--references)
 
 
 1.  Introduction
@@ -368,10 +370,10 @@
     `ON`; any other cluster is `OFF`. BL3-1 initializes the data structures that
     implement the state machine, including the locks that protect them. BL3-1
     accesses the state of a CPU or cluster immediately after reset and before
-    the MMU is enabled in the warm boot path. It is not currently possible to
-    use 'exclusive' based spinlocks, therefore BL3-1 uses locks based on
-    Lamport's Bakery algorithm instead. BL3-1 allocates these locks in device
-    memory. They are accessible irrespective of MMU state.
+    the data cache is enabled in the warm boot path. It is not currently
+    possible to use 'exclusive' based spinlocks, therefore BL3-1 uses locks
+    based on Lamport's Bakery algorithm instead. BL3-1 allocates these locks in
+    device memory by default.
 
 *   Runtime services initialization:
 
@@ -733,32 +735,43 @@
 
 TODO: Provide design walkthrough of PSCI implementation.
 
-The complete PSCI API is not yet implemented. The following functions are
-currently implemented:
+The PSCI v1.0 specification categorizes APIs as optional and mandatory. All the
+mandatory APIs in PSCI v1.0 and all the APIs in PSCI v0.2 draft specification
+[Power State Coordination Interface PDD] [PSCI] are implemented. The table lists
+the PSCI v1.0 APIs and their support in generic code.
 
--   `PSCI_VERSION`
--   `CPU_OFF`
--   `CPU_ON`
--   `CPU_SUSPEND`
--   `AFFINITY_INFO`
--   `SYSTEM_OFF`
--   `SYSTEM_RESET`
+An API implementation might have a dependency on platform code e.g. CPU_SUSPEND
+requires the platform to export a part of the implementation. Hence the level
+of support of the mandatory APIs depends upon the support exported by the
+platform port as well. The Juno and FVP (all variants) platforms export all the
+required support.
 
-The `CPU_ON`, `CPU_OFF` and `CPU_SUSPEND` functions implement the warm boot
-path in ARM Trusted Firmware. `CPU_ON` and `CPU_OFF` have undergone testing
-on all the supported FVPs. `CPU_SUSPEND` & `AFFINITY_INFO` have undergone
-testing only on the AEM v8 Base FVP. Support for `AFFINITY_INFO` is still
-experimental. Support for `CPU_SUSPEND` is stable for entry into power down
-states. Standby states are currently not supported. `PSCI_VERSION` is
-present but completely untested in this version of the software.
+| PSCI v1.0 API         |Supported| Comments                                  |
+|:----------------------|:--------|:------------------------------------------|
+|`PSCI_VERSION`         | Yes     | The version returned is 1.0               |
+|`CPU_SUSPEND`          | Yes*    | The original `power_state` format is used |
+|`CPU_OFF`              | Yes*    |                                           |
+|`CPU_ON`               | Yes*    |                                           |
+|`AFFINITY_INFO`        | Yes     |                                           |
+|`MIGRATE`              | Yes**   |                                           |
+|`MIGRATE_INFO_TYPE`    | Yes**   |                                           |
+|`MIGRATE_INFO_CPU`     | Yes**   |                                           |
+|`SYSTEM_OFF`           | Yes*    |                                           |
+|`SYSTEM_RESET`         | Yes*    |                                           |
+|`PSCI_FEATURES`        | Yes     |                                           |
+|`CPU_FREEZE`           | No      |                                           |
+|`CPU_DEFAULT_SUSPEND`  | No      |                                           |
+|`CPU_HW_STATE`         | No      |                                           |
+|`SYSTEM_SUSPEND`       | No      |                                           |
+|`PSCI_SET_SUSPEND_MODE`| No      |                                           |
+|`PSCI_STAT_RESIDENCY`  | No      |                                           |
+|`PSCI_STAT_COUNT`      | No      |                                           |
 
-The following unsupported functions return with a error code as documented in
-the [Power State Coordination Interface PDD] [PSCI].
+*Note : These PSCI APIs require platform power management hooks to be
+registered with the generic PSCI code to be supported.
 
--   `MIGRATE` : -1 (NOT_SUPPORTED)
--   `MIGRATE_INFO_TYPE` : 2 (Trusted OS is either not present or does not
-     require migration)
--   `MIGRATE_INFO_UP_CPU` : 0 (Return value is UNDEFINED)
+**Note : These PSCI APIs require appropriate Secure Payload Dispatcher
+hooks to be registered with the generic PSCI code to be supported.
 
 
 5.  Secure-EL1 Payloads and Dispatchers
@@ -948,8 +961,48 @@
     fpexc32_el2	:0x0000000004000700
     sp_el0	:0x0000000004010780
 
+7.  Guidelines for Reset Handlers
+---------------------------------
 
-7.  CPU specific operations framework
+Trusted Firmware implements a framework that allows CPU and platform ports to
+perform actions immediately after a CPU is released from reset in both the cold
+and warm boot paths. This is done by calling the `reset_handler()` function in
+both the BL1 and BL3-1 images. It in turn calls the platform and CPU specific
+reset handling functions.
+
+Details for implementing a CPU specific reset handler can be found in
+Section 8. Details for implementing a platform specific reset handler can be
+found in the [Porting Guide](see the `plat_reset_handler()` function).
+
+When adding functionality to a reset handler, the following points should be
+kept in mind.
+
+1.   The first reset handler in the system exists either in a ROM image
+     (e.g. BL1), or BL3-1 if `RESET_TO_BL31` is true. This may be detected at
+     compile time using the constant `FIRST_RESET_HANDLER_CALL`.
+
+2.   When considering ROM images, it's important to consider non TF-based ROMs
+     and ROMs based on previous versions of the TF code.
+
+3.   If the functionality should be applied to a ROM and there is no possibility
+     of a ROM being used that does not apply the functionality (or equivalent),
+     then the functionality should be applied within a `#if
+     FIRST_RESET_HANDLER_CALL` block.
+
+4.   If the functionality should execute in BL3-1 in order to override or
+     supplement a ROM version of the functionality, then the functionality
+     should be applied in the `#else` part of a `#if FIRST_RESET_HANDLER_CALL`
+     block.
+
+5.   If the functionality should be applied to a ROM but there is a possibility
+     of ROMs being used that do not apply the functionality, then the
+     functionality should be applied outside of a `FIRST_RESET_HANDLER_CALL`
+     block, so that BL3-1 has an opportunity to apply the functionality instead.
+     In this case, additional code may be needed to cope with different ROMs
+     that do or do not apply the functionality.
+
+
+8.  CPU specific operations framework
 -----------------------------
 
 Certain aspects of the ARMv8 architecture are implementation defined,
@@ -1014,6 +1067,9 @@
 the returned `cpu_ops` is then invoked which executes the required reset
 handling for that CPU and also any errata workarounds enabled by the platform.
 
+Refer to Section "Guidelines for Reset Handlers" for general guidelines
+regarding placement of code in a reset handler.
+
 ### CPU specific power down sequence
 
 During the BL3-1 initialization sequence, the pointer to the matching `cpu_ops`
@@ -1044,7 +1100,7 @@
 expected by the crash reporting framework.
 
 
-8. Memory layout of BL images
+9. Memory layout of BL images
 -----------------------------
 
 Each bootloader image can be divided in 2 parts:
@@ -1127,9 +1183,10 @@
 * `__BSS_START__` This address must be aligned on a 16-byte boundary.
 * `__BSS_SIZE__`
 
-Similarly, the coherent memory section must be zero-initialised. Also, the MMU
-setup code needs to know the extents of this section to set the right memory
-attributes for it. The following linker symbols are defined for this purpose:
+Similarly, the coherent memory section (if enabled) must be zero-initialised.
+Also, the MMU setup code needs to know the extents of this section to set the
+right memory attributes for it. The following linker symbols are defined for
+this purpose:
 
 * `__COHERENT_RAM_START__` This address must be aligned on a page-size boundary.
 * `__COHERENT_RAM_END__` This address must be aligned on a page-size boundary.
@@ -1399,7 +1456,7 @@
 images in Trusted SRAM.
 
 
-9.  Firmware Image Package (FIP)
+10.  Firmware Image Package (FIP)
 ---------------------------------
 
 Using a Firmware Image Package (FIP) allows for packing bootloader images (and
@@ -1477,7 +1534,208 @@
 platform policy can be modified to allow additional images.
 
 
-10.  Code Structure
+11. Use of coherent memory in Trusted Firmware
+----------------------------------------------
+
+There might be loss of coherency when physical memory with mismatched
+shareability, cacheability and memory attributes is accessed by multiple CPUs
+(refer to section B2.9 of [ARM ARM] for more details). This possibility occurs
+in Trusted Firmware during power up/down sequences when coherency, MMU and
+caches are turned on/off incrementally.
+
+Trusted Firmware defines coherent memory as a region of memory with Device
+nGnRE attributes in the translation tables. The translation granule size in
+Trusted Firmware is 4KB. This is the smallest possible size of the coherent
+memory region.
+
+By default, all data structures which are susceptible to accesses with
+mismatched attributes from various CPUs are allocated in a coherent memory
+region (refer to section 2.1 of [Porting Guide]). The coherent memory region
+accesses are Outer Shareable, non-cacheable and they can be accessed
+with the Device nGnRE attributes when the MMU is turned on. Hence, at the
+expense of at least an extra page of memory, Trusted Firmware is able to work
+around coherency issues due to mismatched memory attributes.
+
+The alternative to the above approach is to allocate the susceptible data
+structures in Normal WriteBack WriteAllocate Inner shareable memory. This
+approach requires the data structures to be designed so that it is possible to
+work around the issue of mismatched memory attributes by performing software
+cache maintenance on them.
+
+### Disabling the use of coherent memory in Trusted Firmware
+
+It might be desirable to avoid the cost of allocating coherent memory on
+platforms which are memory constrained. Trusted Firmware enables inclusion of
+coherent memory in firmware images through the build flag `USE_COHERENT_MEM`.
+This flag is enabled by default. It can be disabled to choose the second
+approach described above.
+
+The below sections analyze the data structures allocated in the coherent memory
+region and the changes required to allocate them in normal memory.
+
+### PSCI Affinity map nodes
+
+The `psci_aff_map` data structure stores the hierarchial node information for
+each affinity level in the system including the PSCI states associated with them.
+By default, this data structure is allocated in the coherent memory region in
+the Trusted Firmware because it can be accessed by multiple CPUs, either with
+their caches enabled or disabled.
+
+	typedef struct aff_map_node {
+		unsigned long mpidr;
+		unsigned char ref_count;
+		unsigned char state;
+		unsigned char level;
+	#if USE_COHERENT_MEM
+		bakery_lock_t lock;
+	#else
+		unsigned char aff_map_index;
+	#endif
+	} aff_map_node_t;
+
+In order to move this data structure to normal memory, the use of each of its
+fields must be analyzed. Fields like `mpidr` and `level` are only written once
+during cold boot. Hence removing them from coherent memory involves only doing
+a clean and invalidate of the cache lines after these fields are written.
+
+The fields `state` and `ref_count` can be concurrently accessed by multiple
+CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual
+exlusion to these fields. As a result, it is possible to move these fields out
+of coherent memory by performing software cache maintenance on them. The field
+`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled.
+The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM`
+is disabled.
+
+### Bakery lock data
+
+The bakery lock data structure `bakery_lock_t` is allocated in coherent memory
+and is accessed by multiple CPUs with mismatched attributes. `bakery_lock_t` is
+defined as follows:
+
+    typedef struct bakery_lock {
+        int owner;
+        volatile char entering[BAKERY_LOCK_MAX_CPUS];
+        volatile unsigned number[BAKERY_LOCK_MAX_CPUS];
+    } bakery_lock_t;
+
+It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU
+fields can be read by all CPUs but only written to by the owning CPU.
+
+Depending upon the data cache line size, the per-CPU fields of the
+`bakery_lock_t` structure for multiple CPUs may exist on a single cache line.
+These per-CPU fields can be read and written during lock contention by multiple
+CPUs with mismatched memory attributes. Since these fields are a part of the
+lock implementation, they do not have access to any other locking primitive to
+safeguard against the resulting coherency issues. As a result, simple software
+cache maintenance is not enough to allocate them in coherent memory. Consider
+the following example.
+
+CPU0 updates its per-CPU field with data cache enabled. This write updates a
+local cache line which contains a copy of the fields for other CPUs as well. Now
+CPU1 updates its per-CPU field of the `bakery_lock_t` structure with data cache
+disabled. CPU1 then issues a DCIVAC operation to invalidate any stale copies of
+its field in any other cache line in the system. This operation will invalidate
+the update made by CPU0 as well.
+
+To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure
+has been redesigned. The changes utilise the characteristic of Lamport's Bakery
+algorithm mentioned earlier. The per-CPU fields of the new lock structure are
+aligned such that they are allocated on separate cache lines. The per-CPU data
+framework in Trusted Firmware is used to achieve this. This enables software to
+perform software cache maintenance on the lock data structure without running
+into coherency issues associated with mismatched attributes.
+
+The per-CPU data framework enables consolidation of data structures on the
+fewest cache lines possible. This saves memory as compared to the scenario where
+each data structure is separately aligned to the cache line boundary to achieve
+the same effect.
+
+The bakery lock data structure `bakery_info_t` is defined for use when
+`USE_COHERENT_MEM` is disabled as follows:
+
+    typedef struct bakery_info {
+        /*
+         * The lock_data is a bit-field of 2 members:
+         * Bit[0]       : choosing. This field is set when the CPU is
+         *                choosing its bakery number.
+         * Bits[1 - 15] : number. This is the bakery number allocated.
+         */
+         volatile uint16_t lock_data;
+    } bakery_info_t;
+
+The `bakery_info_t` represents a single per-CPU field of one lock and
+the combination of corresponding `bakery_info_t` structures for all CPUs in the
+system represents the complete bakery lock. It is embedded in the per-CPU
+data framework `cpu_data` as shown below:
+
+      CPU0 cpu_data
+    ------------------
+    | ....           |
+    |----------------|
+    | `bakery_info_t`| <-- Lock_0 per-CPU field
+    |    Lock_0      |     for CPU0
+    |----------------|
+    | `bakery_info_t`| <-- Lock_1 per-CPU field
+    |    Lock_1      |     for CPU0
+    |----------------|
+    | ....           |
+    |----------------|
+    | `bakery_info_t`| <-- Lock_N per-CPU field
+    |    Lock_N      |     for CPU0
+    ------------------
+
+
+      CPU1 cpu_data
+    ------------------
+    | ....           |
+    |----------------|
+    | `bakery_info_t`| <-- Lock_0 per-CPU field
+    |    Lock_0      |     for CPU1
+    |----------------|
+    | `bakery_info_t`| <-- Lock_1 per-CPU field
+    |    Lock_1      |     for CPU1
+    |----------------|
+    | ....           |
+    |----------------|
+    | `bakery_info_t`| <-- Lock_N per-CPU field
+    |    Lock_N      |     for CPU1
+    ------------------
+
+Consider a system of 2 CPUs with 'N' bakery locks as shown above.  For an
+operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1
+`cpu_data` need to be fetched and appropriate cache operations need to be
+performed for each access.
+
+For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data`
+and each lock is given an `id` to identify it in the array.
+
+### Non Functional Impact of removing coherent memory
+
+Removal of the coherent memory region leads to the additional software overhead
+of performing cache maintenance for the affected data structures. However, since
+the memory where the data structures are allocated is cacheable, the overhead is
+mostly mitigated by an increase in performance.
+
+There is however a performance impact for bakery locks, due to:
+*   Additional cache maintenance operations, and
+*   Multiple cache line reads for each lock operation, since the bakery locks
+    for each CPU are distributed across different cache lines.
+
+The implementation has been optimized to mimimize this additional overhead.
+Measurements indicate that when bakery locks are allocated in Normal memory, the
+minimum latency of acquiring a lock is on an average 3-4 micro seconds whereas
+in Device memory the same is 2 micro seconds. The measurements were done on the
+Juno ARM development platform.
+
+As mentioned earlier, almost a page of memory can be saved by disabling
+`USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide
+whether coherent memory should be used. If a platform disables
+`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should
+reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see
+the [Porting Guide]). Refer to the reference platform code for examples.
+
+
+12.  Code Structure
 -------------------
 
 Trusted Firmware code is logically divided between the three boot loader
@@ -1522,7 +1780,7 @@
 kernel at boot time. These can be found in the `fdts` directory.
 
 
-11.  References
+13.  References
 ---------------
 
 1.  Trusted Board Boot Requirements CLIENT PDD (ARM DEN 0006B-5). Available
@@ -1538,7 +1796,7 @@
 
 _Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved._
 
-
+[ARM ARM]:          http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.e/index.html "ARMv8-A Reference Manual (ARM DDI0487A.E)"
 [PSCI]:             http://infocenter.arm.com/help/topic/com.arm.doc.den0022b/index.html "Power State Coordination Interface PDD (ARM DEN 0022B.b)"
 [SMCCC]:            http://infocenter.arm.com/help/topic/com.arm.doc.den0028a/index.html "SMC Calling Convention PDD (ARM DEN 0028A)"
 [UUID]:             https://tools.ietf.org/rfc/rfc4122.txt "A Universally Unique IDentifier (UUID) URN Namespace"
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 3855ca7..747cb00 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -63,11 +63,11 @@
 stage. In the ARM FVP port, each BL stage configures the MMU in its platform-
 specific architecture setup function, for example `blX_plat_arch_setup()`.
 
-Each platform must allocate a block of identity mapped secure memory with
-Device-nGnRE attributes aligned to page boundary (4K) for each BL stage. This
-memory is identified by the section name `tzfw_coherent_mem` so that its
-possible for the firmware to place variables in it using the following C code
-directive:
+If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a
+block of identity mapped secure memory with Device-nGnRE attributes aligned to
+page boundary (4K) for each BL stage. This memory is identified by the section
+name `tzfw_coherent_mem` so that its possible for the firmware to place
+variables in it using the following C code directive:
 
     __attribute__ ((section("tzfw_coherent_mem")))
 
@@ -246,6 +246,17 @@
     entities than this value using `io_open()` will fail with
     IO_RESOURCES_EXHAUSTED.
 
+If the platform needs to allocate data within the per-cpu data framework in
+BL3-1, it should define the following macro. Currently this is only required if
+the platform decides not to use the coherent memory section by undefining the
+USE_COHERENT_MEM build flag. In this case, the framework allocates the required
+memory within the the per-cpu data to minimize wastage.
+
+*   **#define : PLAT_PCPU_DATA_SIZE**
+
+    Defines the memory (in bytes) to be reserved within the per-cpu data
+    structure for use by the platform layer.
+
 The following constants are optional. They should be defined when the platform
 memory layout implies some image overlaying like on FVP.
 
@@ -472,7 +483,9 @@
 preserve the value in x10 register as it is used by the caller to store the
 return address.
 
-The default implementation doesn't do anything.
+The default implementation doesn't do anything. If a platform needs to override
+the default implementation, refer to the [Firmware Design Guide] for general
+guidelines regarding placement of code in a reset handler.
 
 ### Function : plat_disable_acp()
 
@@ -1083,61 +1096,61 @@
 
 A description of each member of this structure is given below. Please refer to
 the ARM FVP specific implementation of these handlers in [plat/fvp/fvp_pm.c]
-as an example. A platform port may choose not implement some of the power
-management operations.
+as an example. A platform port is expected to implement these handlers if the
+corresponding PSCI operation is to be supported and these handlers are expected
+to succeed if the return type is `void`.
 
 #### plat_pm_ops.affinst_standby()
 
 Perform the platform-specific setup to enter the standby state indicated by the
-passed argument.
+passed argument. The generic code expects the handler to succeed.
 
 #### plat_pm_ops.affinst_on()
 
 Perform the platform specific setup to power on an affinity instance, specified
-by the `MPIDR` (first argument) and `affinity level` (fourth argument). The
-`state` (fifth argument) contains the current state of that affinity instance
+by the `MPIDR` (first argument) and `affinity level` (third argument). The
+`state` (fourth argument) contains the current state of that affinity instance
 (ON or OFF). This is useful to determine whether any action must be taken. For
 example, while powering on a CPU, the cluster that contains this CPU might
 already be in the ON state. The platform decides what actions must be taken to
 transition from the current state to the target state (indicated by the power
-management operation).
+management operation). The generic code expects the platform to return
+E_SUCCESS on success or E_INTERN_FAIL for any failure.
 
 #### plat_pm_ops.affinst_off()
 
-Perform the platform specific setup to power off an affinity instance in the
-`MPIDR` of the calling CPU. It is called by the PSCI `CPU_OFF` API
-implementation.
+Perform the platform specific setup to power off an affinity instance of the
+calling CPU. It is called by the PSCI `CPU_OFF` API implementation.
 
-The `MPIDR` (first argument), `affinity level` (second argument) and `state`
-(third argument) have a similar meaning as described in the `affinst_on()`
-operation. They are used to identify the affinity instance on which the call
-is made and its current state. This gives the platform port an indication of the
+The `affinity level` (first argument) and `state` (second argument) have
+a similar meaning as described in the `affinst_on()` operation. They are
+used to identify the affinity instance on which the call is made and its
+current state. This gives the platform port an indication of the
 state transition it must make to perform the requested action. For example, if
 the calling CPU is the last powered on CPU in the cluster, after powering down
 affinity level 0 (CPU), the platform port should power down affinity level 1
-(the cluster) as well.
+(the cluster) as well. The generic code expects the handler to succeed.
 
 #### plat_pm_ops.affinst_suspend()
 
-Perform the platform specific setup to power off an affinity instance in the
-`MPIDR` of the calling CPU. It is called by the PSCI `CPU_SUSPEND` API
+Perform the platform specific setup to power off an affinity instance of the
+calling CPU. It is called by the PSCI `CPU_SUSPEND` API
 implementation.
 
-The `MPIDR` (first argument), `affinity level` (third argument) and `state`
-(fifth argument) have a similar meaning as described in the `affinst_on()`
-operation. They are used to identify the affinity instance on which the call
-is made and its current state. This gives the platform port an indication of the
-state transition it must make to perform the requested action. For example, if
-the calling CPU is the last powered on CPU in the cluster, after powering down
-affinity level 0 (CPU), the platform port should power down affinity level 1
-(the cluster) as well.
+The `affinity level` (second argument) and `state` (third argument) have a
+similar meaning as described in the `affinst_on()` operation. They are used to
+identify the affinity instance on which the call is made and its current state.
+This gives the platform port an indication of the state transition it must
+make to perform the requested action. For example, if the calling CPU is the
+last powered on CPU in the cluster, after powering down affinity level 0 (CPU),
+the platform port should power down affinity level 1 (the cluster) as well.
 
 The difference between turning an affinity instance off versus suspending it
 is that in the former case, the affinity instance is expected to re-initialize
 its state when its next powered on (see `affinst_on_finish()`). In the latter
 case, the affinity instance is expected to save enough state so that it can
 resume execution by restoring this state when its powered on (see
-`affinst_suspend_finish()`).
+`affinst_suspend_finish()`).The generic code expects the handler to succeed.
 
 #### plat_pm_ops.affinst_on_finish()
 
@@ -1147,8 +1160,9 @@
 this CPU to enter the normal world and also provide secure runtime firmware
 services.
 
-The `MPIDR` (first argument), `affinity level` (second argument) and `state`
-(third argument) have a similar meaning as described in the previous operations.
+The `affinity level` (first argument) and `state` (second argument) have a
+similar meaning as described in the previous operations. The generic code
+expects the handler to succeed.
 
 #### plat_pm_ops.affinst_on_suspend()
 
@@ -1159,8 +1173,25 @@
 restore the saved state for this CPU to resume execution in the normal world
 and also provide secure runtime firmware services.
 
-The `MPIDR` (first argument), `affinity level` (second argument) and `state`
-(third argument) have a similar meaning as described in the previous operations.
+The `affinity level` (first argument) and `state` (second argument) have a
+similar meaning as described in the previous operations. The generic code
+expects the platform to succeed.
+
+#### plat_pm_ops.validate_power_state()
+
+This function is called by the PSCI implementation during the `CPU_SUSPEND`
+call to validate the `power_state` parameter of the PSCI API. If the
+`power_state` is known to be invalid, the platform must return
+PSCI_E_INVALID_PARAMS as error, which is propagated back to the normal
+world PSCI client.
+
+#### plat_pm_ops.validate_ns_entrypoint()
+
+This function is called by the PSCI implementation during the `CPU_SUSPEND`
+and `CPU_ON` calls to validate the non-secure `entry_point` parameter passed
+by the normal world. If the `entry_point` is known to be invalid, the platform
+must return PSCI_E_INVALID_PARAMS as error, which is propagated back to the
+normal world PSCI client.
 
 BL3-1 platform initialization code must also detect the system topology and
 the state of each affinity instance in the topology. This information is
@@ -1447,6 +1478,7 @@
 [IMF Design Guide]:                   interrupt-framework-design.md
 [User Guide]:                         user-guide.md
 [FreeBSD]:                            http://www.freebsd.org
+[Firmware Design Guide]:              firmware-design.md
 
 [plat/common/aarch64/platform_mp_stack.S]: ../plat/common/aarch64/platform_mp_stack.S
 [plat/common/aarch64/platform_up_stack.S]: ../plat/common/aarch64/platform_up_stack.S
diff --git a/docs/user-guide.md b/docs/user-guide.md
index f5a79e4..4209be7 100644
--- a/docs/user-guide.md
+++ b/docs/user-guide.md
@@ -245,6 +245,17 @@
     synchronous method) or 1 (BL3-2 is initialized using asynchronous method).
     Default is 0.
 
+*   `USE_COHERENT_MEM`: This flag determines whether to include the coherent
+    memory region in the BL memory map or not (see "Use of Coherent memory in
+    Trusted Firmware" section in [Firmware Design]). It can take the value 1
+    (Coherent memory region is included) or 0 (Coherent memory region is
+    excluded). Default is 1.
+
+*   `TSPD_ROUTE_IRQ_TO_EL3`: A non zero value enables the routing model
+    for non-secure interrupts in which they are routed to EL3 (TSPD). The
+    default model (when the value is 0) is to route non-secure interrupts
+    to S-EL1 (TSP).
+
 #### FVP specific build options
 
 *   `FVP_TSP_RAM_LOCATION`: location of the TSP binary. Options:
diff --git a/fdts/fvp-base-gicv2-psci.dtb b/fdts/fvp-base-gicv2-psci.dtb
index ae3b4c5..b8a31ce 100644
--- a/fdts/fvp-base-gicv2-psci.dtb
+++ b/fdts/fvp-base-gicv2-psci.dtb
Binary files differ
diff --git a/fdts/fvp-base-gicv2-psci.dts b/fdts/fvp-base-gicv2-psci.dts
index 43518a3..c1c9efb 100644
--- a/fdts/fvp-base-gicv2-psci.dts
+++ b/fdts/fvp-base-gicv2-psci.dts
@@ -52,7 +52,7 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
 		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
diff --git a/fdts/fvp-base-gicv2legacy-psci.dtb b/fdts/fvp-base-gicv2legacy-psci.dtb
index 2d23968..4270623 100644
--- a/fdts/fvp-base-gicv2legacy-psci.dtb
+++ b/fdts/fvp-base-gicv2legacy-psci.dtb
Binary files differ
diff --git a/fdts/fvp-base-gicv2legacy-psci.dts b/fdts/fvp-base-gicv2legacy-psci.dts
index 06d33e7..7bd5ea2 100644
--- a/fdts/fvp-base-gicv2legacy-psci.dts
+++ b/fdts/fvp-base-gicv2legacy-psci.dts
@@ -52,7 +52,7 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
 		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
diff --git a/fdts/fvp-base-gicv3-psci.dtb b/fdts/fvp-base-gicv3-psci.dtb
index c2f63eb..27c3f93 100644
--- a/fdts/fvp-base-gicv3-psci.dtb
+++ b/fdts/fvp-base-gicv3-psci.dtb
Binary files differ
diff --git a/fdts/fvp-base-gicv3-psci.dts b/fdts/fvp-base-gicv3-psci.dts
index 6afa44c..32e577a 100644
--- a/fdts/fvp-base-gicv3-psci.dts
+++ b/fdts/fvp-base-gicv3-psci.dts
@@ -52,7 +52,7 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
 		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
diff --git a/fdts/fvp-foundation-gicv2-psci.dtb b/fdts/fvp-foundation-gicv2-psci.dtb
index df8e629..5b92e5e 100644
--- a/fdts/fvp-foundation-gicv2-psci.dtb
+++ b/fdts/fvp-foundation-gicv2-psci.dtb
Binary files differ
diff --git a/fdts/fvp-foundation-gicv2-psci.dts b/fdts/fvp-foundation-gicv2-psci.dts
index 15ff471..c04d535 100644
--- a/fdts/fvp-foundation-gicv2-psci.dts
+++ b/fdts/fvp-foundation-gicv2-psci.dts
@@ -52,7 +52,7 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
 		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
diff --git a/fdts/fvp-foundation-gicv2legacy-psci.dtb b/fdts/fvp-foundation-gicv2legacy-psci.dtb
index ed43254..71f6ae2 100644
--- a/fdts/fvp-foundation-gicv2legacy-psci.dtb
+++ b/fdts/fvp-foundation-gicv2legacy-psci.dtb
Binary files differ
diff --git a/fdts/fvp-foundation-gicv2legacy-psci.dts b/fdts/fvp-foundation-gicv2legacy-psci.dts
index a923c34..8dba04c 100644
--- a/fdts/fvp-foundation-gicv2legacy-psci.dts
+++ b/fdts/fvp-foundation-gicv2legacy-psci.dts
@@ -83,7 +83,7 @@
 		};
 
 		idle-states {
-			entry-method = "arm,psci";
+			entry-method = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 
 			CPU_SLEEP_0: cpu-sleep-0 {
 				compatible = "arm,idle-state";
diff --git a/fdts/fvp-foundation-gicv3-psci.dtb b/fdts/fvp-foundation-gicv3-psci.dtb
index 724847c..d7d9e14 100644
--- a/fdts/fvp-foundation-gicv3-psci.dtb
+++ b/fdts/fvp-foundation-gicv3-psci.dtb
Binary files differ
diff --git a/fdts/fvp-foundation-gicv3-psci.dts b/fdts/fvp-foundation-gicv3-psci.dts
index 49290e4..48a1afc 100644
--- a/fdts/fvp-foundation-gicv3-psci.dts
+++ b/fdts/fvp-foundation-gicv3-psci.dts
@@ -52,7 +52,7 @@
 	};
 
 	psci {
-		compatible = "arm,psci";
+		compatible = "arm,psci-1.0", "arm,psci-0.2", "arm,psci";
 		method = "smc";
 		cpu_suspend = <0xc4000001>;
 		cpu_off = <0x84000002>;
diff --git a/include/bl31/cpu_data.h b/include/bl31/cpu_data.h
index c886e2b..1926e29 100644
--- a/include/bl31/cpu_data.h
+++ b/include/bl31/cpu_data.h
@@ -32,7 +32,7 @@
 #define __CPU_DATA_H__
 
 /* Offsets for the cpu_data structure */
-#define CPU_DATA_CRASH_BUF_OFFSET	0x20
+#define CPU_DATA_CRASH_BUF_OFFSET	0x18
 #if CRASH_REPORTING
 #define CPU_DATA_LOG2SIZE		7
 #else
@@ -45,10 +45,20 @@
 #ifndef __ASSEMBLY__
 
 #include <arch_helpers.h>
+#include <cassert.h>
 #include <platform_def.h>
 #include <psci.h>
 #include <stdint.h>
 
+/* Offsets for the cpu_data structure */
+#define CPU_DATA_PSCI_LOCK_OFFSET	__builtin_offsetof\
+		(cpu_data_t, psci_svc_cpu_data.pcpu_bakery_info)
+
+#if PLAT_PCPU_DATA_SIZE
+#define CPU_DATA_PLAT_PCPU_OFFSET	__builtin_offsetof\
+		(cpu_data_t, platform_cpu_data)
+#endif
+
 /*******************************************************************************
  * Function & variable prototypes
  ******************************************************************************/
@@ -69,10 +79,13 @@
 typedef struct cpu_data {
 	void *cpu_context[2];
 	uint64_t cpu_ops_ptr;
-	struct psci_cpu_data psci_svc_cpu_data;
 #if CRASH_REPORTING
 	uint64_t crash_buf[CPU_DATA_CRASH_BUF_SIZE >> 3];
 #endif
+	struct psci_cpu_data psci_svc_cpu_data;
+#if PLAT_PCPU_DATA_SIZE
+	uint8_t platform_cpu_data[PLAT_PCPU_DATA_SIZE];
+#endif
 } __aligned(CACHE_WRITEBACK_GRANULE) cpu_data_t;
 
 #if CRASH_REPORTING
diff --git a/include/bl31/interrupt_mgmt.h b/include/bl31/interrupt_mgmt.h
index 3a2c00c..e07ddf8 100644
--- a/include/bl31/interrupt_mgmt.h
+++ b/include/bl31/interrupt_mgmt.h
@@ -63,7 +63,8 @@
 #define INTR_NS_VALID_RM0		0x0
 /* Routed to EL1/EL2 from NS and to EL3 from Secure */
 #define INTR_NS_VALID_RM1		0x1
-
+/* This is the default routing model */
+#define INTR_DEFAULT_RM		0x0
 
 /*******************************************************************************
  * Constants for the _individual_ routing model bits in the 'flags' field for
@@ -123,6 +124,8 @@
 					interrupt_type_handler_t handler,
 					uint32_t flags);
 interrupt_type_handler_t get_interrupt_type_handler(uint32_t interrupt_type);
+int disable_intr_rm_local(uint32_t type, uint32_t security_state);
+int enable_intr_rm_local(uint32_t type, uint32_t security_state);
 
 #endif /*__ASSEMBLY__*/
 #endif /* __INTERRUPT_MGMT_H__ */
diff --git a/include/bl31/runtime_svc.h b/include/bl31/runtime_svc.h
index 2d84986..f112418 100644
--- a/include/bl31/runtime_svc.h
+++ b/include/bl31/runtime_svc.h
@@ -176,6 +176,14 @@
 #define SMC_SET_EL3(_h, _e, _v) \
 	write_ctx_reg(get_el3state_ctx(_h), (_e), (_v));
 
+/* The macro below is used to identify a Standard Service SMC call */
+#define is_std_svc_call(_fid)		((((_fid) >> FUNCID_OEN_SHIFT) & \
+					   FUNCID_OEN_MASK) == OEN_STD_START)
+
+/* The macro below is used to identify a valid Fast SMC call */
+#define is_valid_fast_smc(_fid)		((!(((_fid) >> 16) & 0xff)) && \
+					   (GET_SMC_TYPE(_fid) == SMC_TYPE_FAST))
+
 /*
  * Prototype for runtime service SMC handler function. x0 (SMC Function ID) to
  * x4 are as passed by the caller. Rest of the arguments to SMC and the context
diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h
index 6c23f1b..80bc53b 100644
--- a/include/bl31/services/psci.h
+++ b/include/bl31/services/psci.h
@@ -31,6 +31,17 @@
 #ifndef __PSCI_H__
 #define __PSCI_H__
 
+#include <bakery_lock.h>
+#include <platform_def.h>	/* for PLATFORM_NUM_AFFS */
+
+/*******************************************************************************
+ * Number of affinity instances whose state this psci imp. can track
+ ******************************************************************************/
+#ifdef PLATFORM_NUM_AFFS
+#define PSCI_NUM_AFFS		PLATFORM_NUM_AFFS
+#else
+#define PSCI_NUM_AFFS		(2 * PLATFORM_CORE_COUNT)
+#endif
 
 /*******************************************************************************
  * Defines for runtime services func ids
@@ -50,11 +61,15 @@
 #define PSCI_MIG_INFO_UP_CPU_AARCH64	0xc4000007
 #define PSCI_SYSTEM_OFF			0x84000008
 #define PSCI_SYSTEM_RESET		0x84000009
+#define PSCI_FEATURES			0x8400000A
+
+/* Macro to help build the psci capabilities bitfield */
+#define define_psci_cap(x)		(1 << (x & 0x1f))
 
 /*
  * Number of PSCI calls (above) implemented
  */
-#define PSCI_NUM_CALLS			15
+#define PSCI_NUM_CALLS			16
 
 /*******************************************************************************
  * PSCI Migrate and friends
@@ -78,18 +93,30 @@
 #define PSTATE_TYPE_STANDBY	0x0
 #define PSTATE_TYPE_POWERDOWN	0x1
 
-#define psci_get_pstate_id(pstate)	(pstate >> PSTATE_ID_SHIFT) & \
-					PSTATE_ID_MASK
-#define psci_get_pstate_type(pstate)	(pstate >> PSTATE_TYPE_SHIFT) & \
-					PSTATE_TYPE_MASK
-#define psci_get_pstate_afflvl(pstate)	(pstate >> PSTATE_AFF_LVL_SHIFT) & \
-					PSTATE_AFF_LVL_MASK
+#define psci_get_pstate_id(pstate)	((pstate >> PSTATE_ID_SHIFT) & \
+					PSTATE_ID_MASK)
+#define psci_get_pstate_type(pstate)	((pstate >> PSTATE_TYPE_SHIFT) & \
+					PSTATE_TYPE_MASK)
+#define psci_get_pstate_afflvl(pstate)	((pstate >> PSTATE_AFF_LVL_SHIFT) & \
+					PSTATE_AFF_LVL_MASK)
+
+/*******************************************************************************
+ * PSCI CPU_FEATURES feature flag specific defines
+ ******************************************************************************/
+/* Features flags for CPU SUSPEND power state parameter format. Bits [1:1] */
+#define FF_PSTATE_SHIFT		1
+#define FF_PSTATE_ORIG		0
+#define FF_PSTATE_EXTENDED	1
+
+/* Features flags for CPU SUSPEND OS Initiated mode support. Bits [0:0] */
+#define FF_MODE_SUPPORT_SHIFT		0
+#define FF_SUPPORTS_OS_INIT_MODE	1
 
 /*******************************************************************************
  * PSCI version
  ******************************************************************************/
-#define PSCI_MAJOR_VER		(0 << 16)
-#define PSCI_MINOR_VER		0x2
+#define PSCI_MAJOR_VER		(1 << 16)
+#define PSCI_MINOR_VER		0x0
 
 /*******************************************************************************
  * PSCI error codes
@@ -140,6 +167,9 @@
 	uint32_t power_state;
 	uint32_t max_phys_off_afflvl;	/* Highest affinity level in physically
 					   powered off state */
+#if !USE_COHERENT_MEM
+	bakery_info_t pcpu_bakery_info[PSCI_NUM_AFFS];
+#endif
 } psci_cpu_data_t;
 
 /*******************************************************************************
@@ -147,24 +177,22 @@
  * perform common low level pm functions
  ******************************************************************************/
 typedef struct plat_pm_ops {
-	int (*affinst_standby)(unsigned int);
-	int (*affinst_on)(unsigned long,
-			  unsigned long,
-			  unsigned long,
-			  unsigned int,
-			  unsigned int);
-	int (*affinst_off)(unsigned long, unsigned int, unsigned int);
-	int (*affinst_suspend)(unsigned long,
-			       unsigned long,
-			       unsigned long,
-			       unsigned int,
-			       unsigned int);
-	int (*affinst_on_finish)(unsigned long, unsigned int, unsigned int);
-	int (*affinst_suspend_finish)(unsigned long,
-				      unsigned int,
-				      unsigned int);
+	void (*affinst_standby)(unsigned int power_state);
+	int (*affinst_on)(unsigned long mpidr,
+			  unsigned long sec_entrypoint,
+			  unsigned int afflvl,
+			  unsigned int state);
+	void (*affinst_off)(unsigned int afflvl, unsigned int state);
+	void (*affinst_suspend)(unsigned long sec_entrypoint,
+			       unsigned int afflvl,
+			       unsigned int state);
+	void (*affinst_on_finish)(unsigned int afflvl, unsigned int state);
+	void (*affinst_suspend_finish)(unsigned int afflvl,
+				      unsigned int state);
 	void (*system_off)(void) __dead2;
 	void (*system_reset)(void) __dead2;
+	int (*validate_power_state)(unsigned int power_state);
+	int (*validate_ns_entrypoint)(unsigned long ns_entrypoint);
 } plat_pm_ops_t;
 
 /*******************************************************************************
@@ -176,11 +204,11 @@
 typedef struct spd_pm_ops {
 	void (*svc_on)(uint64_t target_cpu);
 	int32_t (*svc_off)(uint64_t __unused);
-	void (*svc_suspend)(uint64_t power_state);
+	void (*svc_suspend)(uint64_t __unused);
 	void (*svc_on_finish)(uint64_t __unused);
 	void (*svc_suspend_finish)(uint64_t suspend_level);
-	void (*svc_migrate)(uint64_t __unused1, uint64_t __unused2);
-	int32_t (*svc_migrate_info)(uint64_t *__unused);
+	int32_t (*svc_migrate)(uint64_t from_cpu, uint64_t to_cpu);
+	int32_t (*svc_migrate_info)(uint64_t *resident_cpu);
 	void (*svc_system_off)(void);
 	void (*svc_system_reset)(void);
 } spd_pm_ops_t;
@@ -190,9 +218,9 @@
  ******************************************************************************/
 unsigned int psci_version(void);
 int psci_affinity_info(unsigned long, unsigned int);
-int psci_migrate(unsigned int);
-unsigned int psci_migrate_info_type(void);
-unsigned long psci_migrate_info_up_cpu(void);
+int psci_migrate(unsigned long);
+int psci_migrate_info_type(void);
+long psci_migrate_info_up_cpu(void);
 int psci_cpu_on(unsigned long,
 		unsigned long,
 		unsigned long);
diff --git a/include/common/bl_common.h b/include/common/bl_common.h
index 9945e3a..0959c89 100644
--- a/include/common/bl_common.h
+++ b/include/common/bl_common.h
@@ -90,6 +90,18 @@
 	(_p)->h.attr = (uint32_t)(_attr) ; \
 	} while (0)
 
+/*******************************************************************************
+ * Constant that indicates if this is the first version of the reset handler
+ * contained in an image. This will be the case when the image is BL1 or when
+ * its BL3-1 and RESET_TO_BL31 is true. This constant enables a subsequent
+ * version of the reset handler to perform actions that override the ones
+ * performed in the first version of the code. This will be required when the
+ * first version exists in an un-modifiable image e.g. a BootROM image.
+ ******************************************************************************/
+#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+#define FIRST_RESET_HANDLER_CALL
+#endif
+
 #ifndef __ASSEMBLY__
 #include <cdefs.h> /* For __dead2 */
 #include <cassert.h>
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index 7320a0a..65941e6 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -175,6 +175,9 @@
 DEFINE_SYSOP_FUNC(wfe)
 DEFINE_SYSOP_FUNC(sev)
 DEFINE_SYSOP_TYPE_FUNC(dsb, sy)
+DEFINE_SYSOP_TYPE_FUNC(dmb, sy)
+DEFINE_SYSOP_TYPE_FUNC(dsb, ish)
+DEFINE_SYSOP_TYPE_FUNC(dmb, ish)
 DEFINE_SYSOP_FUNC(isb)
 
 uint32_t get_afflvl_shift(uint32_t);
@@ -267,6 +270,8 @@
 DEFINE_SYSREG_RW_FUNCS(vpidr_el2)
 DEFINE_SYSREG_RW_FUNCS(vmpidr_el2)
 
+DEFINE_SYSREG_READ_FUNC(isr_el1)
+
 /* GICv3 System Registers */
 
 DEFINE_RENAME_SYSREG_RW_FUNCS(icc_sre_el1, ICC_SRE_EL1)
diff --git a/include/lib/bakery_lock.h b/include/lib/bakery_lock.h
index 95634cf..9736f85 100644
--- a/include/lib/bakery_lock.h
+++ b/include/lib/bakery_lock.h
@@ -35,6 +35,11 @@
 
 #define BAKERY_LOCK_MAX_CPUS		PLATFORM_CORE_COUNT
 
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+#if USE_COHERENT_MEM
+
 typedef struct bakery_lock {
 	int owner;
 	volatile char entering[BAKERY_LOCK_MAX_CPUS];
@@ -48,4 +53,21 @@
 void bakery_lock_release(bakery_lock_t *bakery);
 int bakery_lock_try(bakery_lock_t *bakery);
 
+#else
+
+typedef struct bakery_info {
+	/*
+	 * The lock_data is a bit-field of 2 members:
+	 * Bit[0]       : choosing. This field is set when the CPU is
+	 *                choosing its bakery number.
+	 * Bits[1 - 15] : number. This is the bakery number allocated.
+	 */
+	volatile uint16_t lock_data;
+} bakery_info_t;
+
+void bakery_lock_get(unsigned int id, unsigned int offset);
+void bakery_lock_release(unsigned int id, unsigned int offset);
+
+#endif /* __USE_COHERENT_MEM__ */
+#endif /* __ASSEMBLY__ */
 #endif /* __BAKERY_LOCK_H__ */
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index 65fb82d..089f09c 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -40,7 +40,7 @@
 CPU_MIDR: /* cpu_ops midr */
 	.space  8
 /* Reset fn is needed in BL at reset vector */
-#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+#if IMAGE_BL1 || IMAGE_BL31
 CPU_RESET_FUNC: /* cpu_ops reset_func */
 	.space  8
 #endif
@@ -65,7 +65,7 @@
 	.section cpu_ops, "a"; .align 3
 	.type cpu_ops_\_name, %object
 	.quad \_midr
-#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+#if IMAGE_BL1 || IMAGE_BL31
 	.if \_noresetfunc
 	.quad 0
 	.else
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index ec18464..306b42e 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -29,6 +29,7 @@
  */
 #include <arch.h>
 #include <asm_macros.S>
+#include <bl_common.h>
 #include <cortex_a53.h>
 #include <cpu_macros.S>
 #include <plat_macros.S>
@@ -58,13 +59,17 @@
 
 func cortex_a53_reset_func
 	/* ---------------------------------------------
-	 * As a bare minimum enable the SMP bit.
+	 * As a bare minimum enable the SMP bit if it is
+	 * not already set.
 	 * ---------------------------------------------
 	 */
 	mrs	x0, CPUECTLR_EL1
+	tst	x0, #CPUECTLR_SMP_BIT
+	b.ne	skip_smp_setup
 	orr	x0, x0, #CPUECTLR_SMP_BIT
 	msr	CPUECTLR_EL1, x0
 	isb
+skip_smp_setup:
 	ret
 
 func cortex_a53_core_pwr_dwn
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index dab16d7..3334e68 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -30,6 +30,7 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
+#include <bl_common.h>
 #include <cortex_a57.h>
 #include <cpu_macros.S>
 #include <plat_macros.S>
@@ -99,9 +100,17 @@
 	ret
 #endif
 apply_806969:
+	/*
+	 * Test if errata has already been applied in an earlier
+	 * invocation of the reset handler and does not need to
+	 * be applied again.
+	 */
 	mrs	x1, CPUACTLR_EL1
+	tst	x1, #CPUACTLR_NO_ALLOC_WBWA
+	b.ne	skip_806969
 	orr	x1, x1, #CPUACTLR_NO_ALLOC_WBWA
 	msr	CPUACTLR_EL1, x1
+skip_806969:
 	ret
 
 
@@ -123,9 +132,17 @@
 	ret
 #endif
 apply_813420:
+	/*
+	 * Test if errata has already been applied in an earlier
+	 * invocation of the reset handler and does not need to
+	 * be applied again.
+	 */
 	mrs	x1, CPUACTLR_EL1
+	tst	x1, #CPUACTLR_DCC_AS_DCCI
+	b.ne	skip_813420
 	orr	x1, x1, #CPUACTLR_DCC_AS_DCCI
 	msr	CPUACTLR_EL1, x1
+skip_813420:
 	ret
 
 	/* -------------------------------------------------
@@ -154,13 +171,18 @@
 	mov	x0, x20
 	bl	errata_a57_813420_wa
 #endif
+
 	/* ---------------------------------------------
-	 * As a bare minimum enable the SMP bit.
+	 * As a bare minimum enable the SMP bit if it is
+	 * not already set.
 	 * ---------------------------------------------
 	 */
 	mrs	x0, CPUECTLR_EL1
+	tst	x0, #CPUECTLR_SMP_BIT
+	b.ne	skip_smp_setup
 	orr	x0, x0, #CPUECTLR_SMP_BIT
 	msr	CPUECTLR_EL1, x0
+skip_smp_setup:
 	isb
 	ret	x19
 
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
index 5680bce..d829f60 100644
--- a/lib/cpus/aarch64/cpu_helpers.S
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -37,7 +37,7 @@
 #endif
 
  /* Reset fn is needed in BL at reset vector */
-#if IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31)
+#if IMAGE_BL1 || IMAGE_BL31
 	/*
 	 * The reset handler common to all platforms.  After a matching
 	 * cpu_ops structure entry is found, the correponding reset_handler
@@ -64,7 +64,7 @@
 1:
 	ret
 
-#endif /* IMAGE_BL1 || (IMAGE_BL31 && RESET_TO_BL31) */
+#endif /* IMAGE_BL1 || IMAGE_BL31 */
 
 #if IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
 	/*
diff --git a/lib/locks/bakery/bakery_lock.c b/lib/locks/bakery/bakery_lock_coherent.c
similarity index 98%
rename from lib/locks/bakery/bakery_lock.c
rename to lib/locks/bakery/bakery_lock_coherent.c
index 7e71dec..5d538ce 100644
--- a/lib/locks/bakery/bakery_lock.c
+++ b/lib/locks/bakery/bakery_lock_coherent.c
@@ -31,11 +31,13 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <bakery_lock.h>
+#include <cpu_data.h>
 #include <platform.h>
 #include <string.h>
 
 /*
- * Functions in this file implement Bakery Algorithm for mutual exclusion.
+ * Functions in this file implement Bakery Algorithm for mutual exclusion with the
+ * bakery lock data structures in coherent memory.
  *
  * ARM architecture offers a family of exclusive access instructions to
  * efficiently implement mutual exclusion with hardware support. However, as
@@ -107,8 +109,6 @@
 	++my_ticket;
 	bakery->number[me] = my_ticket;
 	bakery->entering[me] = 0;
-	dsb();
-	sev();
 
 	return my_ticket;
 }
@@ -151,7 +151,7 @@
 
 		/* Wait for the contender to get their ticket */
 		while (bakery->entering[they])
-			wfe();
+			;
 
 		/*
 		 * If the other party is a contender, they'll have non-zero
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
new file mode 100644
index 0000000..a325fd4
--- /dev/null
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bakery_lock.h>
+#include <cpu_data.h>
+#include <platform.h>
+#include <string.h>
+
+/*
+ * Functions in this file implement Bakery Algorithm for mutual exclusion with the
+ * bakery lock data structures in cacheable and Normal memory.
+ *
+ * ARM architecture offers a family of exclusive access instructions to
+ * efficiently implement mutual exclusion with hardware support. However, as
+ * well as depending on external hardware, these instructions have defined
+ * behavior only on certain memory types (cacheable and Normal memory in
+ * particular; see ARMv8 Architecture Reference Manual section B2.10). Use cases
+ * in trusted firmware are such that mutual exclusion implementation cannot
+ * expect that accesses to the lock have the specific type required by the
+ * architecture for these primitives to function (for example, not all
+ * contenders may have address translation enabled).
+ *
+ * This implementation does not use mutual exclusion primitives. It expects
+ * memory regions where the locks reside to be cacheable and Normal.
+ *
+ * Note that the ARM architecture guarantees single-copy atomicity for aligned
+ * accesses regardless of status of address translation.
+ */
+
+/* Convert a ticket to priority */
+#define PRIORITY(t, pos)	(((t) << 8) | (pos))
+
+#define CHOOSING_TICKET		0x1
+#define CHOOSING_DONE		0x0
+
+#define bakery_is_choosing(info)	(info & 0x1)
+#define bakery_ticket_number(info)	((info >> 1) & 0x7FFF)
+#define make_bakery_data(choosing, number) \
+		(((choosing & 0x1) | (number << 1)) & 0xFFFF)
+
+/* This macro assumes that the bakery_info array is located at the offset specified */
+#define get_my_bakery_info(offset, id)		\
+	(((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
+
+#define get_bakery_info_by_index(offset, id, ix)	\
+	(((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
+
+#define write_cache_op(addr, cached)	\
+				do {	\
+					(cached ? dccvac((uint64_t)addr) :\
+						dcivac((uint64_t)addr));\
+						dsbish();\
+				} while (0)
+
+#define read_cache_op(addr, cached)	if (cached) \
+					    dccivac((uint64_t)addr)
+
+static unsigned int bakery_get_ticket(int id, unsigned int offset,
+						unsigned int me, int is_cached)
+{
+	unsigned int my_ticket, their_ticket;
+	unsigned int they;
+	bakery_info_t *my_bakery_info, *their_bakery_info;
+
+	/*
+	 * Obtain a reference to the bakery information for this cpu and ensure
+	 * it is not NULL.
+	 */
+	my_bakery_info = get_my_bakery_info(offset, id);
+	assert(my_bakery_info);
+
+	/*
+	 * Tell other contenders that we are through the bakery doorway i.e.
+	 * going to allocate a ticket for this cpu.
+	 */
+	my_ticket = 0;
+	my_bakery_info->lock_data = make_bakery_data(CHOOSING_TICKET, my_ticket);
+
+	write_cache_op(my_bakery_info, is_cached);
+
+	/*
+	 * Iterate through the bakery information of each contender to allocate
+	 * the highest ticket number for this cpu.
+	 */
+	for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
+		if (me == they)
+			continue;
+
+		/*
+		 * Get a reference to the other contender's bakery info and
+		 * ensure that a stale copy is not read.
+		 */
+		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		assert(their_bakery_info);
+
+		read_cache_op(their_bakery_info, is_cached);
+
+		/*
+		 * Update this cpu's ticket number if a higher ticket number is
+		 * seen
+		 */
+		their_ticket = bakery_ticket_number(their_bakery_info->lock_data);
+		if (their_ticket > my_ticket)
+			my_ticket = their_ticket;
+	}
+
+	/*
+	 * Compute ticket; then signal to other contenders waiting for us to
+	 * finish calculating our ticket value that we're done
+	 */
+	++my_ticket;
+	my_bakery_info->lock_data = make_bakery_data(CHOOSING_DONE, my_ticket);
+
+	write_cache_op(my_bakery_info, is_cached);
+
+	return my_ticket;
+}
+
+void bakery_lock_get(unsigned int id, unsigned int offset)
+{
+	unsigned int they, me, is_cached;
+	unsigned int my_ticket, my_prio, their_ticket;
+	bakery_info_t *their_bakery_info;
+	uint16_t their_bakery_data;
+
+	me = platform_get_core_pos(read_mpidr_el1());
+
+	is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+
+	/* Get a ticket */
+	my_ticket = bakery_get_ticket(id, offset, me, is_cached);
+
+	/*
+	 * Now that we got our ticket, compute our priority value, then compare
+	 * with that of others, and proceed to acquire the lock
+	 */
+	my_prio = PRIORITY(my_ticket, me);
+	for (they = 0; they < BAKERY_LOCK_MAX_CPUS; they++) {
+		if (me == they)
+			continue;
+
+		/*
+		 * Get a reference to the other contender's bakery info and
+		 * ensure that a stale copy is not read.
+		 */
+		their_bakery_info = get_bakery_info_by_index(offset, id, they);
+		assert(their_bakery_info);
+		read_cache_op(their_bakery_info, is_cached);
+
+		their_bakery_data = their_bakery_info->lock_data;
+
+		/* Wait for the contender to get their ticket */
+		while (bakery_is_choosing(their_bakery_data)) {
+			read_cache_op(their_bakery_info, is_cached);
+			their_bakery_data = their_bakery_info->lock_data;
+		}
+
+		/*
+		 * If the other party is a contender, they'll have non-zero
+		 * (valid) ticket value. If they do, compare priorities
+		 */
+		their_ticket = bakery_ticket_number(their_bakery_data);
+		if (their_ticket && (PRIORITY(their_ticket, they) < my_prio)) {
+			/*
+			 * They have higher priority (lower value). Wait for
+			 * their ticket value to change (either release the lock
+			 * to have it dropped to 0; or drop and probably content
+			 * again for the same lock to have an even higher value)
+			 */
+			do {
+				wfe();
+				read_cache_op(their_bakery_info, is_cached);
+			} while (their_ticket
+				== bakery_ticket_number(their_bakery_info->lock_data));
+		}
+	}
+}
+
+void bakery_lock_release(unsigned int id, unsigned int offset)
+{
+	bakery_info_t *my_bakery_info;
+	unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
+
+	my_bakery_info = get_my_bakery_info(offset, id);
+	my_bakery_info->lock_data = 0;
+	write_cache_op(my_bakery_info, is_cached);
+	sev();
+}
diff --git a/plat/fvp/aarch64/fvp_common.c b/plat/fvp/aarch64/fvp_common.c
index 86b0596..55c2cbb 100644
--- a/plat/fvp/aarch64/fvp_common.c
+++ b/plat/fvp/aarch64/fvp_common.c
@@ -136,7 +136,8 @@
  * Macro generating the code for the function setting up the pagetables as per
  * the platform memory map & initialize the mmu, for the given exception level
  ******************************************************************************/
-#define DEFINE_CONFIGURE_MMU_EL(_el)					\
+#if USE_COHERENT_MEM
+#define DEFINE_CONFIGURE_MMU_EL(_el)				\
 	void fvp_configure_mmu_el##_el(unsigned long total_base,	\
 				   unsigned long total_size,		\
 				   unsigned long ro_start,		\
@@ -158,6 +159,25 @@
 									\
 		enable_mmu_el##_el(0);					\
 	}
+#else
+#define DEFINE_CONFIGURE_MMU_EL(_el)				\
+	void fvp_configure_mmu_el##_el(unsigned long total_base,	\
+				   unsigned long total_size,		\
+				   unsigned long ro_start,		\
+				   unsigned long ro_limit)		\
+	{								\
+		mmap_add_region(total_base, total_base,			\
+				total_size,				\
+				MT_MEMORY | MT_RW | MT_SECURE);		\
+		mmap_add_region(ro_start, ro_start,			\
+				ro_limit - ro_start,			\
+				MT_MEMORY | MT_RO | MT_SECURE);		\
+		mmap_add(fvp_mmap);					\
+		init_xlat_tables();					\
+									\
+		enable_mmu_el##_el(0);					\
+	}
+#endif
 
 /* Define EL1 and EL3 variants of the function initialising the MMU */
 DEFINE_CONFIGURE_MMU_EL(1)
diff --git a/plat/fvp/bl1_fvp_setup.c b/plat/fvp/bl1_fvp_setup.c
index b1205d4..4b421d7 100644
--- a/plat/fvp/bl1_fvp_setup.c
+++ b/plat/fvp/bl1_fvp_setup.c
@@ -40,6 +40,7 @@
 #include "fvp_def.h"
 #include "fvp_private.h"
 
+#if USE_COHERENT_MEM
 /*******************************************************************************
  * Declarations of linker defined symbols which will help us find the layout
  * of trusted SRAM
@@ -56,6 +57,7 @@
  */
 #define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /* Data structure which holds the extents of the trusted SRAM for BL1*/
 static meminfo_t bl1_tzram_layout;
@@ -116,9 +118,12 @@
 	fvp_configure_mmu_el3(bl1_tzram_layout.total_base,
 			      bl1_tzram_layout.total_size,
 			      BL1_RO_BASE,
-			      BL1_RO_LIMIT,
-			      BL1_COHERENT_RAM_BASE,
-			      BL1_COHERENT_RAM_LIMIT);
+			      BL1_RO_LIMIT
+#if USE_COHERENT_MEM
+			      , BL1_COHERENT_RAM_BASE,
+			      BL1_COHERENT_RAM_LIMIT
+#endif
+			     );
 }
 
 
diff --git a/plat/fvp/bl2_fvp_setup.c b/plat/fvp/bl2_fvp_setup.c
index 5eecff1..5764b6a 100644
--- a/plat/fvp/bl2_fvp_setup.c
+++ b/plat/fvp/bl2_fvp_setup.c
@@ -45,8 +45,10 @@
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
  * The next 2 constants identify the extents of the code & RO data region.
@@ -57,6 +59,7 @@
 #define BL2_RO_BASE (unsigned long)(&__RO_START__)
 #define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -66,11 +69,11 @@
  */
 #define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /* Data structure which holds the extents of the trusted SRAM for BL2 */
 static meminfo_t bl2_tzram_layout
-__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
-		section("tzfw_coherent_mem")));
+__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
 
 /* Assert that BL3-1 parameters fit in shared memory */
 CASSERT((PARAMS_BASE + sizeof(bl2_to_bl31_params_mem_t)) <
@@ -209,9 +212,12 @@
 	fvp_configure_mmu_el1(bl2_tzram_layout.total_base,
 			      bl2_tzram_layout.total_size,
 			      BL2_RO_BASE,
-			      BL2_RO_LIMIT,
-			      BL2_COHERENT_RAM_BASE,
-			      BL2_COHERENT_RAM_LIMIT);
+			      BL2_RO_LIMIT
+#if USE_COHERENT_MEM
+			      , BL2_COHERENT_RAM_BASE,
+			      BL2_COHERENT_RAM_LIMIT
+#endif
+			      );
 }
 
 /*******************************************************************************
diff --git a/plat/fvp/bl31_fvp_setup.c b/plat/fvp/bl31_fvp_setup.c
index 69efc9c..3874413 100644
--- a/plat/fvp/bl31_fvp_setup.c
+++ b/plat/fvp/bl31_fvp_setup.c
@@ -48,19 +48,25 @@
  ******************************************************************************/
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
+extern unsigned long __BL31_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-1 image.  These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned.  It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
+ * refer to page-aligned addresses.
  */
 #define BL31_RO_BASE (unsigned long)(&__RO_START__)
 #define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL31_END (unsigned long)(&__BL31_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -70,7 +76,7 @@
  */
 #define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
-
+#endif
 
 #if RESET_TO_BL31
 static entry_point_info_t bl32_image_ep_info;
@@ -235,9 +241,12 @@
 	fvp_cci_enable();
 #endif
 	fvp_configure_mmu_el3(BL31_RO_BASE,
-			      (BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE),
+			      (BL31_END - BL31_RO_BASE),
 			      BL31_RO_BASE,
-			      BL31_RO_LIMIT,
-			      BL31_COHERENT_RAM_BASE,
-			      BL31_COHERENT_RAM_LIMIT);
+			      BL31_RO_LIMIT
+#if USE_COHERENT_MEM
+			      , BL31_COHERENT_RAM_BASE,
+			      BL31_COHERENT_RAM_LIMIT
+#endif
+			      );
 }
diff --git a/plat/fvp/drivers/pwrc/fvp_pwrc.c b/plat/fvp/drivers/pwrc/fvp_pwrc.c
index c32c322..0497c2b 100644
--- a/plat/fvp/drivers/pwrc/fvp_pwrc.c
+++ b/plat/fvp/drivers/pwrc/fvp_pwrc.c
@@ -31,13 +31,19 @@
 #include <bakery_lock.h>
 #include <mmio.h>
 #include "../../fvp_def.h"
+#include "../../fvp_private.h"
 #include "fvp_pwrc.h"
 
 /*
  * TODO: Someday there will be a generic power controller api. At the moment
  * each platform has its own pwrc so just exporting functions is fine.
  */
+#if USE_COHERENT_MEM
 static bakery_lock_t pwrc_lock __attribute__ ((section("tzfw_coherent_mem")));
+#define LOCK_ARG	&pwrc_lock
+#else
+#define LOCK_ARG	FVP_PWRC_BAKERY_ID
+#endif
 
 unsigned int fvp_pwrc_get_cpu_wkr(unsigned long mpidr)
 {
@@ -47,54 +53,55 @@
 unsigned int fvp_pwrc_read_psysr(unsigned long mpidr)
 {
 	unsigned int rc;
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PSYSR_OFF, (unsigned int) mpidr);
 	rc = mmio_read_32(PWRC_BASE + PSYSR_OFF);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 	return rc;
 }
 
 void fvp_pwrc_write_pponr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PPONR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_write_ppoffr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PPOFFR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_set_wen(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
 		      (unsigned int) (PWKUPR_WEN | mpidr));
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_clr_wen(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PWKUPR_OFF,
 		      (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 void fvp_pwrc_write_pcoffr(unsigned long mpidr)
 {
-	bakery_lock_get(&pwrc_lock);
+	fvp_lock_get(LOCK_ARG);
 	mmio_write_32(PWRC_BASE + PCOFFR_OFF, (unsigned int) mpidr);
-	bakery_lock_release(&pwrc_lock);
+	fvp_lock_release(LOCK_ARG);
 }
 
 /* Nothing else to do here apart from initializing the lock */
 int fvp_pwrc_setup(void)
 {
-	bakery_lock_init(&pwrc_lock);
+	fvp_lock_init(LOCK_ARG);
+
 	return 0;
 }
 
diff --git a/plat/fvp/fvp_pm.c b/plat/fvp/fvp_pm.c
index 2038e87..9044e69 100644
--- a/plat/fvp/fvp_pm.c
+++ b/plat/fvp/fvp_pm.c
@@ -119,28 +119,14 @@
 /*******************************************************************************
  * FVP handler called when an affinity instance is about to enter standby.
  ******************************************************************************/
-int fvp_affinst_standby(unsigned int power_state)
+void fvp_affinst_standby(unsigned int power_state)
 {
-	unsigned int target_afflvl;
-
-	/* Sanity check the requested state */
-	target_afflvl = psci_get_pstate_afflvl(power_state);
-
-	/*
-	 * It's possible to enter standby only on affinity level 0 i.e. a cpu
-	 * on the FVP. Ignore any other affinity level.
-	 */
-	if (target_afflvl != MPIDR_AFFLVL0)
-		return PSCI_E_INVALID_PARAMS;
-
 	/*
 	 * Enter standby state
 	 * dsb is good practice before using wfi to enter low power states
 	 */
 	dsb();
 	wfi();
-
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -149,7 +135,6 @@
  ******************************************************************************/
 int fvp_affinst_on(unsigned long mpidr,
 		   unsigned long sec_entrypoint,
-		   unsigned long ns_entrypoint,
 		   unsigned int afflvl,
 		   unsigned int state)
 {
@@ -191,13 +176,12 @@
  * global variables across calls. It will be wise to do flush a write to the
  * global to prevent unpredictable results.
  ******************************************************************************/
-int fvp_affinst_off(unsigned long mpidr,
-		    unsigned int afflvl,
+void fvp_affinst_off(unsigned int afflvl,
 		    unsigned int state)
 {
 	/* Determine if any platform actions need to be executed */
 	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
 
 	/*
 	 * If execution reaches this stage then this affinity level will be
@@ -209,7 +193,6 @@
 	if (afflvl != MPIDR_AFFLVL0)
 		fvp_cluster_pwrdwn_common();
 
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -223,18 +206,21 @@
  * global variables across calls. It will be wise to do flush a write to the
  * global to prevent unpredictable results.
  ******************************************************************************/
-int fvp_affinst_suspend(unsigned long mpidr,
-			unsigned long sec_entrypoint,
-			unsigned long ns_entrypoint,
+void fvp_affinst_suspend(unsigned long sec_entrypoint,
 			unsigned int afflvl,
 			unsigned int state)
 {
+	unsigned long mpidr;
+
 	/* Determine if any platform actions need to be executed. */
 	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
 
-	/* Program the jump address for the target cpu */
-	fvp_program_mailbox(read_mpidr_el1(), sec_entrypoint);
+	/* Get the mpidr for this cpu */
+	mpidr = read_mpidr_el1();
+
+	/* Program the jump address for the this cpu */
+	fvp_program_mailbox(mpidr, sec_entrypoint);
 
 	/* Program the power controller to enable wakeup interrupts. */
 	fvp_pwrc_set_wen(mpidr);
@@ -245,8 +231,6 @@
 	/* Perform the common cluster specific operations */
 	if (afflvl != MPIDR_AFFLVL0)
 		fvp_cluster_pwrdwn_common();
-
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -256,15 +240,17 @@
  * was turned off prior to wakeup and do what's necessary to setup it up
  * correctly.
  ******************************************************************************/
-int fvp_affinst_on_finish(unsigned long mpidr,
-			  unsigned int afflvl,
+void fvp_affinst_on_finish(unsigned int afflvl,
 			  unsigned int state)
 {
-	int rc = PSCI_E_SUCCESS;
+	unsigned long mpidr;
 
 	/* Determine if any platform actions need to be executed. */
 	if (fvp_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
+
+	/* Get the mpidr for this cpu */
+	mpidr = read_mpidr_el1();
 
 	/* Perform the common cluster specific operations */
 	if (afflvl != MPIDR_AFFLVL0) {
@@ -290,15 +276,13 @@
 	fvp_pwrc_clr_wen(mpidr);
 
 	/* Zero the jump address in the mailbox for this cpu */
-	fvp_program_mailbox(read_mpidr_el1(), 0);
+	fvp_program_mailbox(mpidr, 0);
 
 	/* Enable the gic cpu interface */
 	arm_gic_cpuif_setup();
 
 	/* TODO: This setup is needed only after a cold boot */
 	arm_gic_pcpu_distif_setup();
-
-	return rc;
 }
 
 /*******************************************************************************
@@ -308,11 +292,10 @@
  * TODO: At the moment we reuse the on finisher and reinitialize the secure
  * context. Need to implement a separate suspend finisher.
  ******************************************************************************/
-int fvp_affinst_suspend_finish(unsigned long mpidr,
-			       unsigned int afflvl,
+void fvp_affinst_suspend_finish(unsigned int afflvl,
 			       unsigned int state)
 {
-	return fvp_affinst_on_finish(mpidr, afflvl, state);
+	fvp_affinst_on_finish(afflvl, state);
 }
 
 /*******************************************************************************
@@ -339,6 +322,30 @@
 }
 
 /*******************************************************************************
+ * FVP handler called to check the validity of the power state parameter.
+ ******************************************************************************/
+int fvp_validate_power_state(unsigned int power_state)
+{
+	/* Sanity check the requested state */
+	if (psci_get_pstate_type(power_state) == PSTATE_TYPE_STANDBY) {
+		/*
+		 * It's possible to enter standby only on affinity level 0
+		 * i.e. a cpu on the fvp. Ignore any other affinity level.
+		 */
+		if (psci_get_pstate_afflvl(power_state) != MPIDR_AFFLVL0)
+			return PSCI_E_INVALID_PARAMS;
+	}
+
+	/*
+	 * We expect the 'state id' to be zero.
+	 */
+	if (psci_get_pstate_id(power_state))
+		return PSCI_E_INVALID_PARAMS;
+
+	return PSCI_E_SUCCESS;
+}
+
+/*******************************************************************************
  * Export the platform handlers to enable psci to invoke them
  ******************************************************************************/
 static const plat_pm_ops_t fvp_plat_pm_ops = {
@@ -349,7 +356,8 @@
 	.affinst_on_finish = fvp_affinst_on_finish,
 	.affinst_suspend_finish = fvp_affinst_suspend_finish,
 	.system_off = fvp_system_off,
-	.system_reset = fvp_system_reset
+	.system_reset = fvp_system_reset,
+	.validate_power_state = fvp_validate_power_state
 };
 
 /*******************************************************************************
diff --git a/plat/fvp/fvp_private.h b/plat/fvp/fvp_private.h
index 2dcb327..3949754 100644
--- a/plat/fvp/fvp_private.h
+++ b/plat/fvp/fvp_private.h
@@ -31,7 +31,9 @@
 #ifndef __FVP_PRIVATE_H__
 #define __FVP_PRIVATE_H__
 
+#include <bakery_lock.h>
 #include <bl_common.h>
+#include <cpu_data.h>
 #include <platform_def.h>
 
 
@@ -55,10 +57,60 @@
 	entry_point_info_t bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
+#if USE_COHERENT_MEM
+/*
+ * These are wrapper macros to the Coherent Memory Bakery Lock API.
+ */
+#define fvp_lock_init(_lock_arg)	bakery_lock_init(_lock_arg)
+#define fvp_lock_get(_lock_arg)		bakery_lock_get(_lock_arg)
+#define fvp_lock_release(_lock_arg)	bakery_lock_release(_lock_arg)
+
+#else
+
 /*******************************************************************************
- * Forward declarations
+ * Constants to specify how many bakery locks this platform implements. These
+ * are used if the platform chooses not to use coherent memory for bakery lock
+ * data structures.
  ******************************************************************************/
-struct meminfo;
+#define FVP_MAX_BAKERIES	1
+#define FVP_PWRC_BAKERY_ID	0
+
+/*******************************************************************************
+ * Definition of structure which holds platform specific per-cpu data. Currently
+ * it holds only the bakery lock information for each cpu. Constants to
+ * specify how many bakeries this platform implements and bakery ids are
+ * specified in fvp_def.h
+ ******************************************************************************/
+typedef struct fvp_cpu_data {
+	bakery_info_t pcpu_bakery_info[FVP_MAX_BAKERIES];
+} fvp_cpu_data_t;
+
+/* Macro to define the offset of bakery_info_t in fvp_cpu_data_t */
+#define FVP_CPU_DATA_LOCK_OFFSET	__builtin_offsetof\
+					    (fvp_cpu_data_t, pcpu_bakery_info)
+
+
+/*******************************************************************************
+ * Helper macros for bakery lock api when using the above fvp_cpu_data_t for
+ * bakery lock data structures. It assumes that the bakery_info is at the
+ * beginning of the platform specific per-cpu data.
+ ******************************************************************************/
+#define fvp_lock_init(_lock_arg)	/* No init required */
+#define fvp_lock_get(_lock_arg)		bakery_lock_get(_lock_arg,  	    \
+						CPU_DATA_PLAT_PCPU_OFFSET + \
+						FVP_CPU_DATA_LOCK_OFFSET)
+#define fvp_lock_release(_lock_arg)	bakery_lock_release(_lock_arg,	    \
+						CPU_DATA_PLAT_PCPU_OFFSET + \
+						FVP_CPU_DATA_LOCK_OFFSET)
+
+/*
+ * Ensure that the size of the FVP specific per-cpu data structure and the size
+ * of the memory allocated in generic per-cpu data for the platform are the same.
+ */
+CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(fvp_cpu_data_t),	\
+	fvp_pcpu_data_size_mismatch);
+
+#endif /* __USE_COHERENT_MEM__ */
 
 /*******************************************************************************
  * Function and variable prototypes
@@ -66,15 +118,22 @@
 void fvp_configure_mmu_el1(unsigned long total_base,
 			   unsigned long total_size,
 			   unsigned long,
-			   unsigned long,
-			   unsigned long,
-			   unsigned long);
+			   unsigned long
+#if USE_COHERENT_MEM
+			   , unsigned long,
+			   unsigned long
+#endif
+			   );
 void fvp_configure_mmu_el3(unsigned long total_base,
 			   unsigned long total_size,
 			   unsigned long,
-			   unsigned long,
-			   unsigned long,
-			   unsigned long);
+			   unsigned long
+#if USE_COHERENT_MEM
+			   , unsigned long,
+			   unsigned long
+#endif
+			   );
+
 int fvp_config_setup(void);
 
 void fvp_cci_init(void);
diff --git a/plat/fvp/include/platform_def.h b/plat/fvp/include/platform_def.h
index 2925525..b0460e0 100644
--- a/plat/fvp/include/platform_def.h
+++ b/plat/fvp/include/platform_def.h
@@ -189,5 +189,12 @@
 #define CACHE_WRITEBACK_SHIFT   6
 #define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
 
+#if !USE_COHERENT_MEM
+/*******************************************************************************
+ * Size of the per-cpu data in bytes that should be reserved in the generic
+ * per-cpu data structure for the FVP port.
+ ******************************************************************************/
+#define PLAT_PCPU_DATA_SIZE	2
+#endif
 
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/fvp/tsp/tsp_fvp_setup.c b/plat/fvp/tsp/tsp_fvp_setup.c
index 301f669..d8f46bd 100644
--- a/plat/fvp/tsp/tsp_fvp_setup.c
+++ b/plat/fvp/tsp/tsp_fvp_setup.c
@@ -40,19 +40,25 @@
  ******************************************************************************/
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
+extern unsigned long __BL32_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code & RO data region and
+ * the limit of the BL3-2 image. These addresses are used by the MMU setup code
+ * and therefore they must be page-aligned.  It is the responsibility of the
+ * linker script to ensure that __RO_START__, __RO_END__ & & __BL32_END__
+ * linker symbols refer to page-aligned addresses.
  */
 #define BL32_RO_BASE (unsigned long)(&__RO_START__)
 #define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL32_END (unsigned long)(&__BL32_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -62,6 +68,7 @@
  */
 #define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /*******************************************************************************
  * Initialize the UART
@@ -93,9 +100,12 @@
 void tsp_plat_arch_setup(void)
 {
 	fvp_configure_mmu_el1(BL32_RO_BASE,
-			      (BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE),
+			      (BL32_END - BL32_RO_BASE),
 			      BL32_RO_BASE,
-			      BL32_RO_LIMIT,
-			      BL32_COHERENT_RAM_BASE,
-			      BL32_COHERENT_RAM_LIMIT);
+			      BL32_RO_LIMIT
+#if USE_COHERENT_MEM
+			      , BL32_COHERENT_RAM_BASE,
+			      BL32_COHERENT_RAM_LIMIT
+#endif
+			      );
 }
diff --git a/plat/juno/aarch64/juno_common.c b/plat/juno/aarch64/juno_common.c
index 8129b05..7ad40d0 100644
--- a/plat/juno/aarch64/juno_common.c
+++ b/plat/juno/aarch64/juno_common.c
@@ -140,6 +140,7 @@
  * Macro generating the code for the function setting up the pagetables as per
  * the platform memory map & initialize the mmu, for the given exception level
  ******************************************************************************/
+#if USE_COHERENT_MEM
 #define DEFINE_CONFIGURE_MMU_EL(_el)				\
 	void configure_mmu_el##_el(unsigned long total_base,	\
 				  unsigned long total_size,	\
@@ -162,7 +163,25 @@
 								\
 	       enable_mmu_el##_el(0);				\
 	}
-
+#else
+#define DEFINE_CONFIGURE_MMU_EL(_el)				\
+	void configure_mmu_el##_el(unsigned long total_base,	\
+				  unsigned long total_size,	\
+				  unsigned long ro_start,	\
+				  unsigned long ro_limit)	\
+	{							\
+	       mmap_add_region(total_base, total_base,		\
+			       total_size,			\
+			       MT_MEMORY | MT_RW | MT_SECURE);	\
+	       mmap_add_region(ro_start, ro_start,		\
+			       ro_limit - ro_start,		\
+			       MT_MEMORY | MT_RO | MT_SECURE);	\
+	       mmap_add(juno_mmap);				\
+	       init_xlat_tables();				\
+								\
+	       enable_mmu_el##_el(0);				\
+	}
+#endif
 /* Define EL1 and EL3 variants of the function initialising the MMU */
 DEFINE_CONFIGURE_MMU_EL(1)
 DEFINE_CONFIGURE_MMU_EL(3)
diff --git a/plat/juno/aarch64/plat_helpers.S b/plat/juno/aarch64/plat_helpers.S
index 028a1a5..37966a3 100644
--- a/plat/juno/aarch64/plat_helpers.S
+++ b/plat/juno/aarch64/plat_helpers.S
@@ -115,12 +115,20 @@
 	/* -----------------------------------------------------
 	 * void plat_reset_handler(void);
 	 *
+	 * Before adding code in this function, refer to the
+	 * guidelines in docs/firmware-design.md to determine
+	 * whether the code should reside within the
+	 * FIRST_RESET_HANDLER_CALL block or not.
+	 *
 	 * Implement workaround for defect id 831273 by enabling
 	 * an event stream every 65536 cycles and set the L2 RAM
-	 * latencies for Cortex-A57.
+	 * latencies for Cortex-A57. This code is included only
+	 * when FIRST_RESET_HANDLER_CALL is defined since it
+	 * should be executed only during BL1.
 	 * -----------------------------------------------------
 	 */
 func plat_reset_handler
+#ifdef FIRST_RESET_HANDLER_CALL
 	/* Read the MIDR_EL1 */
 	mrs	x0, midr_el1
 	ubfx	x1, x0, MIDR_PN_SHIFT, #12
@@ -135,11 +143,12 @@
 
 1:
 	/* ---------------------------------------------
-	* Enable the event stream every 65536 cycles
-	* ---------------------------------------------
-	*/
+	 * Enable the event stream every 65536 cycles
+	 * ---------------------------------------------
+	 */
 	mov     x0, #(0xf << EVNTI_SHIFT)
 	orr     x0, x0, #EVNTEN_BIT
 	msr     CNTKCTL_EL1, x0
 	isb
+#endif /* FIRST_RESET_HANDLER_CALL */
 	ret
diff --git a/plat/juno/bl1_plat_setup.c b/plat/juno/bl1_plat_setup.c
index e27e394..23e8592 100644
--- a/plat/juno/bl1_plat_setup.c
+++ b/plat/juno/bl1_plat_setup.c
@@ -41,6 +41,7 @@
 #include "juno_def.h"
 #include "juno_private.h"
 
+#if USE_COHERENT_MEM
 /*******************************************************************************
  * Declarations of linker defined symbols which will help us find the layout
  * of trusted RAM
@@ -57,6 +58,7 @@
  */
 #define BL1_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL1_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /* Data structure which holds the extents of the trusted RAM for BL1 */
 static meminfo_t bl1_tzram_layout;
@@ -189,9 +191,12 @@
 	configure_mmu_el3(bl1_tzram_layout.total_base,
 			  bl1_tzram_layout.total_size,
 			  TZROM_BASE,
-			  TZROM_BASE + TZROM_SIZE,
-			  BL1_COHERENT_RAM_BASE,
-			  BL1_COHERENT_RAM_LIMIT);
+			  TZROM_BASE + TZROM_SIZE
+#if USE_COHERENT_MEM
+			  , BL1_COHERENT_RAM_BASE,
+			  BL1_COHERENT_RAM_LIMIT
+#endif
+			  );
 }
 
 /*******************************************************************************
diff --git a/plat/juno/bl2_plat_setup.c b/plat/juno/bl2_plat_setup.c
index 900a587..8e7b2a0 100644
--- a/plat/juno/bl2_plat_setup.c
+++ b/plat/juno/bl2_plat_setup.c
@@ -47,8 +47,10 @@
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
  * The next 2 constants identify the extents of the code & RO data region.
@@ -59,6 +61,7 @@
 #define BL2_RO_BASE (unsigned long)(&__RO_START__)
 #define BL2_RO_LIMIT (unsigned long)(&__RO_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -68,11 +71,11 @@
  */
 #define BL2_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL2_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /* Data structure which holds the extents of the trusted RAM for BL2 */
 static meminfo_t bl2_tzram_layout
-__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE),
-		section("tzfw_coherent_mem")));
+__attribute__ ((aligned(PLATFORM_CACHE_LINE_SIZE)));
 
 /*******************************************************************************
  * Structure which holds the arguments which need to be passed to BL3-1
@@ -194,9 +197,12 @@
 	configure_mmu_el1(bl2_tzram_layout.total_base,
 			  bl2_tzram_layout.total_size,
 			  BL2_RO_BASE,
-			  BL2_RO_LIMIT,
-			  BL2_COHERENT_RAM_BASE,
-			  BL2_COHERENT_RAM_LIMIT);
+			  BL2_RO_LIMIT
+#if USE_COHERENT_MEM
+			  , BL2_COHERENT_RAM_BASE,
+			  BL2_COHERENT_RAM_LIMIT
+#endif
+			  );
 }
 
 /*******************************************************************************
diff --git a/plat/juno/bl31_plat_setup.c b/plat/juno/bl31_plat_setup.c
index c450462..ad8ea43 100644
--- a/plat/juno/bl31_plat_setup.c
+++ b/plat/juno/bl31_plat_setup.c
@@ -48,19 +48,25 @@
  ******************************************************************************/
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
+extern unsigned long __BL31_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-1 image.  These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned.  It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL31_END__ linker symbols
+ * refer to page-aligned addresses.
  */
 #define BL31_RO_BASE (unsigned long)(&__RO_START__)
 #define BL31_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL31_END (unsigned long)(&__BL31_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -70,6 +76,7 @@
  */
 #define BL31_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL31_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /******************************************************************************
  * Placeholder variables for copying the arguments that have been passed to
@@ -178,9 +185,13 @@
 void bl31_plat_arch_setup()
 {
 	configure_mmu_el3(BL31_RO_BASE,
-			  BL31_COHERENT_RAM_LIMIT - BL31_RO_BASE,
+			  (BL31_END - BL31_RO_BASE),
 			  BL31_RO_BASE,
-			  BL31_RO_LIMIT,
+			  BL31_RO_LIMIT
+#if USE_COHERENT_MEM
+			  ,
 			  BL31_COHERENT_RAM_BASE,
-			  BL31_COHERENT_RAM_LIMIT);
+			  BL31_COHERENT_RAM_LIMIT
+#endif
+			  );
 }
diff --git a/plat/juno/include/platform_def.h b/plat/juno/include/platform_def.h
index ee77b83..cd07702 100644
--- a/plat/juno/include/platform_def.h
+++ b/plat/juno/include/platform_def.h
@@ -174,4 +174,12 @@
 #define CACHE_WRITEBACK_SHIFT   6
 #define CACHE_WRITEBACK_GRANULE (1 << CACHE_WRITEBACK_SHIFT)
 
+#if !USE_COHERENT_MEM
+/*******************************************************************************
+ * Size of the per-cpu data in bytes that should be reserved in the generic
+ * per-cpu data structure for the Juno port.
+ ******************************************************************************/
+#define PLAT_PCPU_DATA_SIZE	2
+#endif
+
 #endif /* __PLATFORM_DEF_H__ */
diff --git a/plat/juno/juno_private.h b/plat/juno/juno_private.h
index 14d7af4..70439e8 100644
--- a/plat/juno/juno_private.h
+++ b/plat/juno/juno_private.h
@@ -31,7 +31,9 @@
 #ifndef __JUNO_PRIVATE_H__
 #define __JUNO_PRIVATE_H__
 
+#include <bakery_lock.h>
 #include <bl_common.h>
+#include <cpu_data.h>
 #include <platform_def.h>
 #include <stdint.h>
 
@@ -59,6 +61,68 @@
 	struct entry_point_info bl31_ep_info;
 } bl2_to_bl31_params_mem_t;
 
+#if IMAGE_BL31
+#if USE_COHERENT_MEM
+/*
+ * These are wrapper macros to the Coherent Memory Bakery Lock API.
+ */
+#define juno_lock_init(_lock_arg)		bakery_lock_init(_lock_arg)
+#define juno_lock_get(_lock_arg)		bakery_lock_get(_lock_arg)
+#define juno_lock_release(_lock_arg)		bakery_lock_release(_lock_arg)
+
+#else
+
+/*******************************************************************************
+ * Constants that specify how many bakeries this platform implements and bakery
+ * ids.
+ ******************************************************************************/
+#define JUNO_MAX_BAKERIES	1
+#define JUNO_MHU_BAKERY_ID	0
+
+/*******************************************************************************
+ * Definition of structure which holds platform specific per-cpu data. Currently
+ * it holds only the bakery lock information for each cpu. Constants to specify
+ * how many bakeries this platform implements and bakery ids are specified in
+ * juno_def.h
+ ******************************************************************************/
+typedef struct juno_cpu_data {
+	bakery_info_t pcpu_bakery_info[JUNO_MAX_BAKERIES];
+} juno_cpu_data_t;
+
+/* Macro to define the offset of bakery_info_t in juno_cpu_data_t */
+#define JUNO_CPU_DATA_LOCK_OFFSET	__builtin_offsetof\
+					    (juno_cpu_data_t, pcpu_bakery_info)
+
+/*******************************************************************************
+ * Helper macros for bakery lock api when using the above juno_cpu_data_t for
+ * bakery lock data structures. It assumes that the bakery_info is at the
+ * beginning of the platform specific per-cpu data.
+ ******************************************************************************/
+#define juno_lock_init(_lock_arg)		/* No init required */
+#define juno_lock_get(_lock_arg)		bakery_lock_get(_lock_arg,	\
+						    CPU_DATA_PLAT_PCPU_OFFSET + \
+						    JUNO_CPU_DATA_LOCK_OFFSET)
+#define juno_lock_release(_lock_arg)		bakery_lock_release(_lock_arg,	\
+						    CPU_DATA_PLAT_PCPU_OFFSET + \
+						    JUNO_CPU_DATA_LOCK_OFFSET)
+
+/*
+ * Ensure that the size of the Juno specific per-cpu data structure and the size
+ * of the memory allocated in generic per-cpu data for the platform are the same.
+ */
+CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(juno_cpu_data_t),	\
+	juno_pcpu_data_size_mismatch);
+#endif /* __USE_COHERENT_MEM__ */
+#else
+/*
+ * Dummy wrapper macros for all other BL stages other than BL3-1
+ */
+#define juno_lock_init(_lock_arg)
+#define juno_lock_get(_lock_arg)
+#define juno_lock_release(_lock_arg)
+
+#endif /* __IMAGE_BL31__ */
+
 /*******************************************************************************
  * Function and variable prototypes
  ******************************************************************************/
@@ -70,15 +134,21 @@
 void configure_mmu_el1(unsigned long total_base,
 		       unsigned long total_size,
 		       unsigned long ro_start,
-		       unsigned long ro_limit,
-		       unsigned long coh_start,
-		       unsigned long coh_limit);
+		       unsigned long ro_limit
+#if USE_COHERENT_MEM
+		       , unsigned long coh_start,
+		       unsigned long coh_limit
+#endif
+		       );
 void configure_mmu_el3(unsigned long total_base,
 		       unsigned long total_size,
 		       unsigned long ro_start,
-		       unsigned long ro_limit,
-		       unsigned long coh_start,
-		       unsigned long coh_limit);
+		       unsigned long ro_limit
+#if USE_COHERENT_MEM
+		       , unsigned long coh_start,
+		       unsigned long coh_limit
+#endif
+		       );
 void plat_report_exception(unsigned long type);
 unsigned long plat_get_ns_image_entrypoint(void);
 unsigned long platform_get_stack(unsigned long mpidr);
diff --git a/plat/juno/mhu.c b/plat/juno/mhu.c
index b6541a8..c1c414c 100644
--- a/plat/juno/mhu.c
+++ b/plat/juno/mhu.c
@@ -32,6 +32,7 @@
 #include <bakery_lock.h>
 #include <mmio.h>
 #include "juno_def.h"
+#include "juno_private.h"
 #include "mhu.h"
 
 /* SCP MHU secure channel registers */
@@ -44,13 +45,20 @@
 #define CPU_INTR_S_SET		0x308
 #define CPU_INTR_S_CLEAR	0x310
 
-
+#if IMAGE_BL31
+#if USE_COHERENT_MEM
 static bakery_lock_t mhu_secure_lock __attribute__ ((section("tzfw_coherent_mem")));
-
+#define LOCK_ARG		&mhu_secure_lock
+#else
+#define LOCK_ARG		JUNO_MHU_BAKERY_ID
+#endif /*__USE_COHERENT_MEM__ */
+#else
+#define LOCK_ARG	/* Locks required only for BL3-1 images */
+#endif /* __IMAGE_BL31__ */
 
 void mhu_secure_message_start(void)
 {
-	bakery_lock_get(&mhu_secure_lock);
+	juno_lock_get(LOCK_ARG);
 
 	/* Make sure any previous command has finished */
 	while (mmio_read_32(MHU_BASE + CPU_INTR_S_STAT) != 0)
@@ -80,12 +88,12 @@
 	/* Clear any response we got by writing all ones to the CLEAR register */
 	mmio_write_32(MHU_BASE + SCP_INTR_S_CLEAR, 0xffffffffu);
 
-	bakery_lock_release(&mhu_secure_lock);
+	juno_lock_release(LOCK_ARG);
 }
 
 void mhu_secure_init(void)
 {
-	bakery_lock_init(&mhu_secure_lock);
+	juno_lock_init(LOCK_ARG);
 
 	/*
 	 * Clear the CPU's INTR register to make sure we don't see a stale
diff --git a/plat/juno/plat_pm.c b/plat/juno/plat_pm.c
index adf599f..47338cf 100644
--- a/plat/juno/plat_pm.c
+++ b/plat/juno/plat_pm.c
@@ -85,12 +85,36 @@
 }
 
 /*******************************************************************************
+ * Juno handler called to check the validity of the power state parameter.
+ ******************************************************************************/
+int32_t juno_validate_power_state(unsigned int power_state)
+{
+	/* Sanity check the requested state */
+	if (psci_get_pstate_type(power_state) == PSTATE_TYPE_STANDBY) {
+		/*
+		 * It's possible to enter standby only on affinity level 0 i.e.
+		 * a cpu on the Juno. Ignore any other affinity level.
+		 */
+		if (psci_get_pstate_afflvl(power_state) != MPIDR_AFFLVL0)
+			return PSCI_E_INVALID_PARAMS;
+	}
+
+	/*
+	 * We expect the 'state id' to be zero.
+	 */
+	if (psci_get_pstate_id(power_state))
+		return PSCI_E_INVALID_PARAMS;
+
+	return PSCI_E_SUCCESS;
+}
+
+
+/*******************************************************************************
  * Juno handler called when an affinity instance is about to be turned on. The
  * level and mpidr determine the affinity instance.
  ******************************************************************************/
 int32_t juno_affinst_on(uint64_t mpidr,
 			uint64_t sec_entrypoint,
-			uint64_t ns_entrypoint,
 			uint32_t afflvl,
 			uint32_t state)
 {
@@ -119,11 +143,16 @@
  * was turned off prior to wakeup and do what's necessary to setup it up
  * correctly.
  ******************************************************************************/
-int32_t juno_affinst_on_finish(uint64_t mpidr, uint32_t afflvl, uint32_t state)
+void juno_affinst_on_finish(uint32_t afflvl, uint32_t state)
 {
+	unsigned long mpidr;
+
 	/* Determine if any platform actions need to be executed. */
 	if (juno_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
+
+	/* Get the mpidr for this cpu */
+	mpidr = read_mpidr_el1();
 
 	/*
 	 * Perform the common cluster specific operations i.e enable coherency
@@ -141,8 +170,6 @@
 
 	/* Clear the mailbox for this cpu. */
 	juno_program_mailbox(mpidr, 0);
-
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -151,7 +178,7 @@
  * the highest affinity level which will be powered down. It performs the
  * actions common to the OFF and SUSPEND calls.
  ******************************************************************************/
-static int32_t juno_power_down_common(uint32_t afflvl)
+static void juno_power_down_common(uint32_t afflvl)
 {
 	uint32_t cluster_state = scpi_power_on;
 
@@ -172,8 +199,6 @@
 				 scpi_power_off,
 				 cluster_state,
 				 scpi_power_on);
-
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -187,13 +212,13 @@
  * global variables across calls. It will be wise to do flush a write to the
  * global to prevent unpredictable results.
  ******************************************************************************/
-static int32_t juno_affinst_off(uint64_t mpidr, uint32_t afflvl, uint32_t state)
+static void juno_affinst_off(uint32_t afflvl, uint32_t state)
 {
 	/* Determine if any platform actions need to be executed */
 	if (juno_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
 
-	return juno_power_down_common(afflvl);
+	juno_power_down_common(afflvl);
 }
 
 /*******************************************************************************
@@ -208,22 +233,20 @@
  * global variables across calls. It will be wise to do flush a write to the
  * global to prevent unpredictable results.
  ******************************************************************************/
-static int32_t juno_affinst_suspend(uint64_t mpidr,
-				    uint64_t sec_entrypoint,
-				    uint64_t ns_entrypoint,
+static void juno_affinst_suspend(uint64_t sec_entrypoint,
 				    uint32_t afflvl,
 				    uint32_t state)
 {
 	/* Determine if any platform actions need to be executed */
 	if (juno_do_plat_actions(afflvl, state) == -EAGAIN)
-		return PSCI_E_SUCCESS;
+		return;
 
 	/*
 	 * Setup mailbox with address for CPU entrypoint when it next powers up.
 	 */
-	juno_program_mailbox(mpidr, sec_entrypoint);
+	juno_program_mailbox(read_mpidr_el1(), sec_entrypoint);
 
-	return juno_power_down_common(afflvl);
+	juno_power_down_common(afflvl);
 }
 
 /*******************************************************************************
@@ -233,11 +256,10 @@
  * TODO: At the moment we reuse the on finisher and reinitialize the secure
  * context. Need to implement a separate suspend finisher.
  ******************************************************************************/
-static int32_t juno_affinst_suspend_finish(uint64_t mpidr,
-					   uint32_t afflvl,
+static void juno_affinst_suspend_finish(uint32_t afflvl,
 					   uint32_t state)
 {
-	return juno_affinst_on_finish(mpidr, afflvl, state);
+	juno_affinst_on_finish(afflvl, state);
 }
 
 /*******************************************************************************
@@ -278,21 +300,10 @@
 /*******************************************************************************
  * Handler called when an affinity instance is about to enter standby.
  ******************************************************************************/
-int32_t juno_affinst_standby(unsigned int power_state)
+void juno_affinst_standby(unsigned int power_state)
 {
-	unsigned int target_afflvl;
 	unsigned int scr;
 
-	/* Sanity check the requested state */
-	target_afflvl = psci_get_pstate_afflvl(power_state);
-
-	/*
-	 * It's possible to enter standby only on affinity level 0 i.e. a cpu
-	 * on the Juno. Ignore any other affinity level.
-	 */
-	if (target_afflvl != MPIDR_AFFLVL0)
-		return PSCI_E_INVALID_PARAMS;
-
 	scr = read_scr_el3();
 	/* Enable PhysicalIRQ bit for NS world to wake the CPU */
 	write_scr_el3(scr | SCR_IRQ_BIT);
@@ -305,8 +316,6 @@
 	 * done by eret while el3_exit to save some execution cycles.
 	 */
 	write_scr_el3(scr);
-
-	return PSCI_E_SUCCESS;
 }
 
 /*******************************************************************************
@@ -320,7 +329,8 @@
 	.affinst_suspend	= juno_affinst_suspend,
 	.affinst_suspend_finish	= juno_affinst_suspend_finish,
 	.system_off		= juno_system_off,
-	.system_reset		= juno_system_reset
+	.system_reset		= juno_system_reset,
+	.validate_power_state	= juno_validate_power_state
 };
 
 /*******************************************************************************
diff --git a/plat/juno/platform.mk b/plat/juno/platform.mk
index 6ca219d..158e3ac 100644
--- a/plat/juno/platform.mk
+++ b/plat/juno/platform.mk
@@ -66,7 +66,6 @@
 				plat/juno/aarch64/juno_common.c
 
 BL2_SOURCES		+=	drivers/arm/tzc400/tzc400.c		\
-				lib/locks/bakery/bakery_lock.c		\
 				plat/common/aarch64/platform_up_stack.S	\
 				plat/juno/bl2_plat_setup.c		\
 				plat/juno/mhu.c				\
diff --git a/plat/juno/tsp/tsp_plat_setup.c b/plat/juno/tsp/tsp_plat_setup.c
index 0a9d4cb..8293a13 100644
--- a/plat/juno/tsp/tsp_plat_setup.c
+++ b/plat/juno/tsp/tsp_plat_setup.c
@@ -40,19 +40,25 @@
  ******************************************************************************/
 extern unsigned long __RO_START__;
 extern unsigned long __RO_END__;
+extern unsigned long __BL32_END__;
 
+#if USE_COHERENT_MEM
 extern unsigned long __COHERENT_RAM_START__;
 extern unsigned long __COHERENT_RAM_END__;
+#endif
 
 /*
- * The next 2 constants identify the extents of the code & RO data region.
- * These addresses are used by the MMU setup code and therefore they must be
- * page-aligned.  It is the responsibility of the linker script to ensure that
- * __RO_START__ and __RO_END__ linker symbols refer to page-aligned addresses.
+ * The next 3 constants identify the extents of the code, RO data region and the
+ * limit of the BL3-2 image.  These addresses are used by the MMU setup code and
+ * therefore they must be page-aligned.  It is the responsibility of the linker
+ * script to ensure that __RO_START__, __RO_END__ & __BL32_END__ linker symbols
+ * refer to page-aligned addresses.
  */
 #define BL32_RO_BASE (unsigned long)(&__RO_START__)
 #define BL32_RO_LIMIT (unsigned long)(&__RO_END__)
+#define BL32_END (unsigned long)(&__BL32_END__)
 
+#if USE_COHERENT_MEM
 /*
  * The next 2 constants identify the extents of the coherent memory region.
  * These addresses are used by the MMU setup code and therefore they must be
@@ -62,6 +68,7 @@
  */
 #define BL32_COHERENT_RAM_BASE (unsigned long)(&__COHERENT_RAM_START__)
 #define BL32_COHERENT_RAM_LIMIT (unsigned long)(&__COHERENT_RAM_END__)
+#endif
 
 /*******************************************************************************
  * Initialize the UART
@@ -90,9 +97,12 @@
 void tsp_plat_arch_setup(void)
 {
 	configure_mmu_el1(BL32_RO_BASE,
-			  BL32_COHERENT_RAM_LIMIT - BL32_RO_BASE,
+			  (BL32_END - BL32_RO_BASE),
 			  BL32_RO_BASE,
-			  BL32_RO_LIMIT,
-			  BL32_COHERENT_RAM_BASE,
-			  BL32_COHERENT_RAM_LIMIT);
+			  BL32_RO_LIMIT
+#if USE_COHERENT_MEM
+			  , BL32_COHERENT_RAM_BASE,
+			  BL32_COHERENT_RAM_LIMIT
+#endif
+			  );
 }
diff --git a/services/spd/opteed/opteed_pm.c b/services/spd/opteed/opteed_pm.c
index 552d7a0c..37419ec 100644
--- a/services/spd/opteed/opteed_pm.c
+++ b/services/spd/opteed/opteed_pm.c
@@ -48,7 +48,7 @@
  * This cpu is being turned off. Allow the OPTEED/OPTEE to perform any actions
  * needed
  ******************************************************************************/
-static int32_t opteed_cpu_off_handler(uint64_t cookie)
+static int32_t opteed_cpu_off_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
@@ -82,7 +82,7 @@
  * This cpu is being suspended. S-EL1 state must have been saved in the
  * resident cpu (mpidr format) if it is a UP/UP migratable OPTEE.
  ******************************************************************************/
-static void opteed_cpu_suspend_handler(uint64_t power_state)
+static void opteed_cpu_suspend_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
@@ -92,10 +92,7 @@
 	assert(optee_vectors);
 	assert(get_optee_pstate(optee_ctx->state) == OPTEE_PSTATE_ON);
 
-	/* Program the entry point, power_state parameter and enter OPTEE */
-	write_ctx_reg(get_gpregs_ctx(&optee_ctx->cpu_ctx),
-		      CTX_GPREG_X0,
-		      power_state);
+	/* Program the entry point and enter OPTEE */
 	cm_set_elr_el3(SECURE, (uint64_t) &optee_vectors->cpu_suspend_entry);
 	rc = opteed_synchronous_sp_entry(optee_ctx);
 
@@ -116,7 +113,7 @@
  * after initialising minimal architectural state that guarantees safe
  * execution.
  ******************************************************************************/
-static void opteed_cpu_on_finish_handler(uint64_t cookie)
+static void opteed_cpu_on_finish_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
diff --git a/services/spd/tspd/tspd.mk b/services/spd/tspd/tspd.mk
index cd4b45a..139c7d7 100644
--- a/services/spd/tspd/tspd.mk
+++ b/services/spd/tspd/tspd.mk
@@ -52,3 +52,10 @@
 
 # Let the top-level Makefile know that we intend to build the SP from source
 NEED_BL32		:=	yes
+
+# Flag used to enable routing of non-secure interrupts to EL3 when they are
+# generated while the code is executing in S-EL1/0.
+TSPD_ROUTE_IRQ_TO_EL3	:=	0
+
+$(eval $(call assert_boolean,TSPD_ROUTE_IRQ_TO_EL3))
+$(eval $(call add_define,TSPD_ROUTE_IRQ_TO_EL3))
diff --git a/services/spd/tspd/tspd_main.c b/services/spd/tspd/tspd_main.c
index b8d4569..ee17483 100644
--- a/services/spd/tspd/tspd_main.c
+++ b/services/spd/tspd/tspd_main.c
@@ -48,6 +48,7 @@
 #include <platform.h>
 #include <runtime_svc.h>
 #include <stddef.h>
+#include <string.h>
 #include <tsp.h>
 #include <uuid.h>
 #include "tspd_private.h"
@@ -71,6 +72,24 @@
 
 int32_t tspd_init(void);
 
+uint64_t tspd_handle_sp_preemption(void *handle)
+{
+	cpu_context_t *ns_cpu_context;
+	assert(handle == cm_get_context(SECURE));
+	cm_el1_sysregs_context_save(SECURE);
+	/* Get a reference to the non-secure context */
+	ns_cpu_context = cm_get_context(NON_SECURE);
+	assert(ns_cpu_context);
+
+	/*
+	 * Restore non-secure state. The secure system
+	 * register context will be saved when required.
+	 */
+	cm_el1_sysregs_context_restore(NON_SECURE);
+	cm_set_next_eret_context(NON_SECURE);
+
+	SMC_RET1(ns_cpu_context, SMC_PREEMPTED);
+}
 /*******************************************************************************
  * This function is the handler registered for S-EL1 interrupts by the TSPD. It
  * validates the interrupt and upon success arranges entry into the TSP at
@@ -120,11 +139,16 @@
 						      CTX_SPSR_EL3);
 		tsp_ctx->saved_elr_el3 = SMC_GET_EL3(&tsp_ctx->cpu_ctx,
 						     CTX_ELR_EL3);
+#if TSPD_ROUTE_IRQ_TO_EL3
+		/*Need to save the previously interrupted secure context */
+		memcpy(&tsp_ctx->sp_ctx, &tsp_ctx->cpu_ctx, TSPD_SP_CTX_SIZE);
+#endif
 	}
 
 	cm_el1_sysregs_context_restore(SECURE);
 	cm_set_elr_spsr_el3(SECURE, (uint64_t) &tsp_vectors->fiq_entry,
 		    SPSR_64(MODE_EL1, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS));
+
 	cm_set_next_eret_context(SECURE);
 
 	/*
@@ -137,6 +161,34 @@
 	SMC_RET2(&tsp_ctx->cpu_ctx, TSP_HANDLE_FIQ_AND_RETURN, read_elr_el3());
 }
 
+#if TSPD_ROUTE_IRQ_TO_EL3
+/*******************************************************************************
+ * This function is the handler registered for S-EL1 interrupts by the TSPD. It
+ * validates the interrupt and upon success arranges entry into the TSP at
+ * 'tsp_fiq_entry()' for handling the interrupt.
+ ******************************************************************************/
+static uint64_t tspd_ns_interrupt_handler(uint32_t id,
+					    uint32_t flags,
+					    void *handle,
+					    void *cookie)
+{
+	/* Check the security state when the exception was generated */
+	assert(get_interrupt_src_ss(flags) == SECURE);
+
+#if IMF_READ_INTERRUPT_ID
+	/* Check the security status of the interrupt */
+	assert(plat_ic_get_interrupt_type(id) == INTR_TYPE_NS);
+#endif
+	/*
+	 * Disable the routing of NS interrupts from secure world to EL3 while
+	 * interrupted on this core.
+	 */
+	disable_intr_rm_local(INTR_TYPE_NS, SECURE);
+
+	return tspd_handle_sp_preemption(handle);
+}
+#endif
+
 /*******************************************************************************
  * Secure Payload Dispatcher setup. The SPD finds out the SP entrypoint and type
  * (aarch32/aarch64) if not already known and initialises the context for entry
@@ -270,21 +322,7 @@
 		if (ns)
 			SMC_RET1(handle, SMC_UNK);
 
-		assert(handle == cm_get_context(SECURE));
-		cm_el1_sysregs_context_save(SECURE);
-		/* Get a reference to the non-secure context */
-		ns_cpu_context = cm_get_context(NON_SECURE);
-		assert(ns_cpu_context);
-
-		/*
-		 * Restore non-secure state. There is no need to save the
-		 * secure system register context since the TSP was supposed
-		 * to preserve it during S-EL1 interrupt handling.
-		 */
-		cm_el1_sysregs_context_restore(NON_SECURE);
-		cm_set_next_eret_context(NON_SECURE);
-
-		SMC_RET1(ns_cpu_context, SMC_PREEMPTED);
+		return tspd_handle_sp_preemption(handle);
 
 	/*
 	 * This function ID is used only by the TSP to indicate that it has
@@ -308,6 +346,14 @@
 			SMC_SET_EL3(&tsp_ctx->cpu_ctx,
 				    CTX_ELR_EL3,
 				    tsp_ctx->saved_elr_el3);
+#if TSPD_ROUTE_IRQ_TO_EL3
+			/*
+			 * Need to restore the previously interrupted
+			 * secure context.
+			 */
+			memcpy(&tsp_ctx->cpu_ctx, &tsp_ctx->sp_ctx,
+				TSPD_SP_CTX_SIZE);
+#endif
 		}
 
 		/* Get a reference to the non-secure context */
@@ -389,6 +435,28 @@
 						flags);
 			if (rc)
 				panic();
+
+#if TSPD_ROUTE_IRQ_TO_EL3
+			/*
+			 * Register an interrupt handler for NS interrupts when
+			 * generated during code executing in secure state are
+			 * routed to EL3.
+			 */
+			flags = 0;
+			set_interrupt_rm_flag(flags, SECURE);
+
+			rc = register_interrupt_type_handler(INTR_TYPE_NS,
+						tspd_ns_interrupt_handler,
+						flags);
+			if (rc)
+				panic();
+
+			/*
+			 * Disable the interrupt NS locally since it will be enabled globally
+			 * within cm_init_context.
+			 */
+			disable_intr_rm_local(INTR_TYPE_NS, SECURE);
+#endif
 		}
 
 
@@ -507,6 +575,13 @@
 				set_std_smc_active_flag(tsp_ctx->state);
 				cm_set_elr_el3(SECURE, (uint64_t)
 						&tsp_vectors->std_smc_entry);
+#if TSPD_ROUTE_IRQ_TO_EL3
+				/*
+				 * Enable the routing of NS interrupts to EL3
+				 * during STD SMC processing on this core.
+				 */
+				enable_intr_rm_local(INTR_TYPE_NS, SECURE);
+#endif
 			}
 
 			cm_el1_sysregs_context_restore(SECURE);
@@ -529,8 +604,18 @@
 			/* Restore non-secure state */
 			cm_el1_sysregs_context_restore(NON_SECURE);
 			cm_set_next_eret_context(NON_SECURE);
-			if (GET_SMC_TYPE(smc_fid) == SMC_TYPE_STD)
+			if (GET_SMC_TYPE(smc_fid) == SMC_TYPE_STD) {
 				clr_std_smc_active_flag(tsp_ctx->state);
+#if TSPD_ROUTE_IRQ_TO_EL3
+				/*
+				 * Disable the routing of NS interrupts to EL3
+				 * after STD SMC processing is finished on this
+				 * core.
+				 */
+				disable_intr_rm_local(INTR_TYPE_NS, SECURE);
+#endif
+			}
+
 			SMC_RET3(ns_cpu_context, x1, x2, x3);
 		}
 
@@ -564,6 +649,15 @@
 		 * We are done stashing the non-secure context. Ask the
 		 * secure payload to do the work now.
 		 */
+#if TSPD_ROUTE_IRQ_TO_EL3
+		/*
+		 * Enable the routing of NS interrupts to EL3 during resumption
+		 * of STD SMC call on this core.
+		 */
+		enable_intr_rm_local(INTR_TYPE_NS, SECURE);
+#endif
+
+
 
 		/* We just need to return to the preempted point in
 		 * TSP and the execution will resume as normal.
diff --git a/services/spd/tspd/tspd_pm.c b/services/spd/tspd/tspd_pm.c
index 1655285..009ff5f 100644
--- a/services/spd/tspd/tspd_pm.c
+++ b/services/spd/tspd/tspd_pm.c
@@ -49,7 +49,7 @@
  * This cpu is being turned off. Allow the TSPD/TSP to perform any actions
  * needed
  ******************************************************************************/
-static int32_t tspd_cpu_off_handler(uint64_t cookie)
+static int32_t tspd_cpu_off_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
@@ -83,7 +83,7 @@
  * This cpu is being suspended. S-EL1 state must have been saved in the
  * resident cpu (mpidr format) if it is a UP/UP migratable TSP.
  ******************************************************************************/
-static void tspd_cpu_suspend_handler(uint64_t power_state)
+static void tspd_cpu_suspend_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
@@ -93,10 +93,7 @@
 	assert(tsp_vectors);
 	assert(get_tsp_pstate(tsp_ctx->state) == TSP_PSTATE_ON);
 
-	/* Program the entry point, power_state parameter and enter the TSP */
-	write_ctx_reg(get_gpregs_ctx(&tsp_ctx->cpu_ctx),
-		      CTX_GPREG_X0,
-		      power_state);
+	/* Program the entry point and enter the TSP */
 	cm_set_elr_el3(SECURE, (uint64_t) &tsp_vectors->cpu_suspend_entry);
 	rc = tspd_synchronous_sp_entry(tsp_ctx);
 
@@ -117,7 +114,7 @@
  * after initialising minimal architectural state that guarantees safe
  * execution.
  ******************************************************************************/
-static void tspd_cpu_on_finish_handler(uint64_t cookie)
+static void tspd_cpu_on_finish_handler(uint64_t unused)
 {
 	int32_t rc = 0;
 	uint64_t mpidr = read_mpidr();
@@ -136,6 +133,14 @@
 	/* Initialise this cpu's secure context */
 	cm_init_context(mpidr, &tsp_on_entrypoint);
 
+#if TSPD_ROUTE_IRQ_TO_EL3
+	/*
+	 * Disable the NS interrupt locally since it will be enabled globally
+	 * within cm_init_context.
+	 */
+	disable_intr_rm_local(INTR_TYPE_NS, SECURE);
+#endif
+
 	/* Enter the TSP */
 	rc = tspd_synchronous_sp_entry(tsp_ctx);
 
diff --git a/services/spd/tspd/tspd_private.h b/services/spd/tspd/tspd_private.h
index 4d48dbd..5f6fb2b 100644
--- a/services/spd/tspd/tspd_private.h
+++ b/services/spd/tspd/tspd_private.h
@@ -120,6 +120,34 @@
 #define TSPD_C_RT_CTX_SIZE		0x60
 #define TSPD_C_RT_CTX_ENTRIES		(TSPD_C_RT_CTX_SIZE >> DWORD_SHIFT)
 
+/*******************************************************************************
+ * Constants that allow assembler code to preserve caller-saved registers of the
+ * SP context while performing a TSP preemption.
+ * Note: These offsets have to match with the offsets for the corresponding
+ * registers in cpu_context as we are using memcpy to copy the values from
+ * cpu_context to sp_ctx.
+ ******************************************************************************/
+#define TSPD_SP_CTX_X0		0x0
+#define TSPD_SP_CTX_X1		0x8
+#define TSPD_SP_CTX_X2		0x10
+#define TSPD_SP_CTX_X3		0x18
+#define TSPD_SP_CTX_X4		0x20
+#define TSPD_SP_CTX_X5		0x28
+#define TSPD_SP_CTX_X6		0x30
+#define TSPD_SP_CTX_X7		0x38
+#define TSPD_SP_CTX_X8		0x40
+#define TSPD_SP_CTX_X9		0x48
+#define TSPD_SP_CTX_X10		0x50
+#define TSPD_SP_CTX_X11		0x58
+#define TSPD_SP_CTX_X12		0x60
+#define TSPD_SP_CTX_X13		0x68
+#define TSPD_SP_CTX_X14		0x70
+#define TSPD_SP_CTX_X15		0x78
+#define TSPD_SP_CTX_X16		0x80
+#define TSPD_SP_CTX_X17		0x88
+#define TSPD_SP_CTX_SIZE	0x90
+#define TSPD_SP_CTX_ENTRIES		(TSPD_SP_CTX_SIZE >> DWORD_SHIFT)
+
 #ifndef __ASSEMBLY__
 
 #include <cassert.h>
@@ -142,6 +170,17 @@
 CASSERT(TSPD_C_RT_CTX_SIZE == sizeof(c_rt_regs_t),	\
 	assert_spd_c_rt_regs_size_mismatch);
 
+/* SEL1 Secure payload (SP) caller saved register context structure. */
+DEFINE_REG_STRUCT(sp_ctx_regs, TSPD_SP_CTX_ENTRIES);
+
+/*
+ * Compile time assertion to ensure that both the compiler and linker
+ * have the same double word aligned view of the size of the C runtime
+ * register context.
+ */
+CASSERT(TSPD_SP_CTX_SIZE == sizeof(sp_ctx_regs_t),	\
+	assert_spd_sp_regs_size_mismatch);
+
 /*******************************************************************************
  * Structure which helps the SPD to maintain the per-cpu state of the SP.
  * 'saved_spsr_el3' - temporary copy to allow FIQ handling when the TSP has been
@@ -155,6 +194,10 @@
  * 'cpu_ctx'        - space to maintain SP architectural state
  * 'saved_tsp_args' - space to store arguments for TSP arithmetic operations
  *                    which will queried using the TSP_GET_ARGS SMC by TSP.
+ * 'sp_ctx'         - space to save the SEL1 Secure Payload(SP) caller saved
+ *                    register context after it has been preempted by an EL3
+ *                    routed NS interrupt and when a Secure Interrupt is taken
+ *                    to SP.
  ******************************************************************************/
 typedef struct tsp_context {
 	uint64_t saved_elr_el3;
@@ -164,6 +207,9 @@
 	uint64_t c_rt_ctx;
 	cpu_context_t cpu_ctx;
 	uint64_t saved_tsp_args[TSP_NUM_ARGS];
+#if TSPD_ROUTE_IRQ_TO_EL3
+	sp_ctx_regs_t sp_ctx;
+#endif
 } tsp_context_t;
 
 /* Helper macros to store and retrieve tsp args from tsp_context */
diff --git a/services/std_svc/psci/psci_afflvl_off.c b/services/std_svc/psci/psci_afflvl_off.c
index 7e05789..7eb9688 100644
--- a/services/std_svc/psci/psci_afflvl_off.c
+++ b/services/std_svc/psci/psci_afflvl_off.c
@@ -31,56 +31,35 @@
 #include <arch.h>
 #include <arch_helpers.h>
 #include <assert.h>
+#include <debug.h>
 #include <string.h>
 #include "psci_private.h"
 
-typedef int (*afflvl_off_handler_t)(aff_map_node_t *);
+typedef void (*afflvl_off_handler_t)(aff_map_node_t *node);
 
 /*******************************************************************************
  * The next three functions implement a handler for each supported affinity
  * level which is called when that affinity level is turned off.
  ******************************************************************************/
-static int psci_afflvl0_off(aff_map_node_t *cpu_node)
+static void psci_afflvl0_off(aff_map_node_t *cpu_node)
 {
-	int rc;
-
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
 	/*
-	 * Generic management: Get the index for clearing any lingering re-entry
-	 * information and allow the secure world to switch itself off
-	 */
-
-	/*
-	 * Call the cpu off handler registered by the Secure Payload Dispatcher
-	 * to let it do any bookeeping. Assume that the SPD always reports an
-	 * E_DENIED error if SP refuse to power down
-	 */
-	if (psci_spd_pm && psci_spd_pm->svc_off) {
-		rc = psci_spd_pm->svc_off(0);
-		if (rc)
-			return rc;
-	}
-
-	/*
 	 * Arch. management. Perform the necessary steps to flush all
 	 * cpu caches.
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
-	if (!psci_plat_pm_ops->affinst_off)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Perform platform specific actions to turn this
 	 * cpu off e.g. exit cpu coherency, program the power controller etc.
 	 */
-	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-					     cpu_node->level,
-					     psci_get_phys_state(cpu_node));
+	psci_plat_pm_ops->affinst_off(cpu_node->level,
+				     psci_get_phys_state(cpu_node));
 }
 
-static int psci_afflvl1_off(aff_map_node_t *cluster_node)
+static void psci_afflvl1_off(aff_map_node_t *cluster_node)
 {
 	/* Sanity check the cluster level */
 	assert(cluster_node->level == MPIDR_AFFLVL1);
@@ -91,20 +70,16 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL1);
 
-	if (!psci_plat_pm_ops->affinst_off)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. Management. Allow the platform to do its cluster
 	 * specific bookeeping e.g. turn off interconnect coherency,
 	 * program the power controller etc.
 	 */
-	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-					     cluster_node->level,
+	psci_plat_pm_ops->affinst_off(cluster_node->level,
 					     psci_get_phys_state(cluster_node));
 }
 
-static int psci_afflvl2_off(aff_map_node_t *system_node)
+static void psci_afflvl2_off(aff_map_node_t *system_node)
 {
 	/* Cannot go beyond this level */
 	assert(system_node->level == MPIDR_AFFLVL2);
@@ -120,15 +95,11 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL2);
 
-	if (!psci_plat_pm_ops->affinst_off)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. Management : Allow the platform to do its bookeeping
 	 * at this affinity level
 	 */
-	return psci_plat_pm_ops->affinst_off(read_mpidr_el1(),
-					     system_node->level,
+	psci_plat_pm_ops->affinst_off(system_node->level,
 					     psci_get_phys_state(system_node));
 }
 
@@ -143,11 +114,11 @@
  * topology tree and calls the off handler for the corresponding affinity
  * levels
  ******************************************************************************/
-static int psci_call_off_handlers(aff_map_node_t *mpidr_nodes[],
+static void psci_call_off_handlers(aff_map_node_t *mpidr_nodes[],
 				  int start_afflvl,
 				  int end_afflvl)
 {
-	int rc = PSCI_E_INVALID_PARAMS, level;
+	int level;
 	aff_map_node_t *node;
 
 	for (level = start_afflvl; level <= end_afflvl; level++) {
@@ -155,17 +126,8 @@
 		if (node == NULL)
 			continue;
 
-		/*
-		 * TODO: In case of an error should there be a way
-		 * of restoring what we might have torn down at
-		 * lower affinity levels.
-		 */
-		rc = psci_afflvl_off_handlers[level](node);
-		if (rc != PSCI_E_SUCCESS)
-			break;
+		psci_afflvl_off_handlers[level](node);
 	}
-
-	return rc;
 }
 
 /*******************************************************************************
@@ -190,22 +152,28 @@
 int psci_afflvl_off(int start_afflvl,
 		    int end_afflvl)
 {
-	int rc = PSCI_E_SUCCESS;
+	int rc;
 	mpidr_aff_map_nodes_t mpidr_nodes;
 	unsigned int max_phys_off_afflvl;
 
 	/*
+	 * This function must only be called on platforms where the
+	 * CPU_OFF platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->affinst_off);
+
+	/*
 	 * Collect the pointers to the nodes in the topology tree for
 	 * each affinity instance in the mpidr. If this function does
 	 * not return successfully then either the mpidr or the affinity
-	 * levels are incorrect. In either case, we cannot return back
-	 * to the caller as it would not know what to do.
+	 * levels are incorrect. Either way, this an internal TF error
+	 * therefore assert.
 	 */
 	rc = psci_get_aff_map_nodes(read_mpidr_el1() & MPIDR_AFFINITY_MASK,
 				    start_afflvl,
 				    end_afflvl,
 				    mpidr_nodes);
-	assert (rc == PSCI_E_SUCCESS);
+	assert(rc == PSCI_E_SUCCESS);
 
 	/*
 	 * This function acquires the lock corresponding to each affinity
@@ -216,6 +184,18 @@
 				  end_afflvl,
 				  mpidr_nodes);
 
+
+	/*
+	 * Call the cpu off handler registered by the Secure Payload Dispatcher
+	 * to let it do any bookkeeping. Assume that the SPD always reports an
+	 * E_DENIED error if SP refuse to power down
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_off) {
+		rc = psci_spd_pm->svc_off(0);
+		if (rc)
+			goto exit;
+	}
+
 	/*
 	 * This function updates the state of each affinity instance
 	 * corresponding to the mpidr in the range of affinity levels
@@ -235,7 +215,7 @@
 	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
 
 	/* Perform generic, architecture and platform specific handling */
-	rc = psci_call_off_handlers(mpidr_nodes,
+	psci_call_off_handlers(mpidr_nodes,
 				    start_afflvl,
 				    end_afflvl);
 
@@ -247,6 +227,7 @@
 	 */
 	psci_set_max_phys_off_afflvl(PSCI_INVALID_DATA);
 
+exit:
 	/*
 	 * Release the locks corresponding to each affinity level in the
 	 * reverse order to which they were acquired.
@@ -255,5 +236,13 @@
 				  end_afflvl,
 				  mpidr_nodes);
 
+	/*
+	 * Check if all actions needed to safely power down this cpu have
+	 * successfully completed. Enter a wfi loop which will allow the
+	 * power controller to physically power down this cpu.
+	 */
+	if (rc == PSCI_E_SUCCESS)
+		psci_power_down_wfi();
+
 	return rc;
 }
diff --git a/services/std_svc/psci/psci_afflvl_on.c b/services/std_svc/psci/psci_afflvl_on.c
index f1d30c9..0ee03cb 100644
--- a/services/std_svc/psci/psci_afflvl_on.c
+++ b/services/std_svc/psci/psci_afflvl_on.c
@@ -33,28 +33,22 @@
 #include <assert.h>
 #include <bl_common.h>
 #include <bl31.h>
+#include <debug.h>
 #include <context_mgmt.h>
 #include <platform.h>
 #include <runtime_svc.h>
 #include <stddef.h>
 #include "psci_private.h"
 
-typedef int (*afflvl_on_handler_t)(unsigned long,
-				 aff_map_node_t *,
-				 unsigned long,
-				 unsigned long);
+typedef int (*afflvl_on_handler_t)(unsigned long target_cpu,
+				 aff_map_node_t *node);
 
 /*******************************************************************************
  * This function checks whether a cpu which has been requested to be turned on
  * is OFF to begin with.
  ******************************************************************************/
-static int cpu_on_validate_state(aff_map_node_t *node)
+static int cpu_on_validate_state(unsigned int psci_state)
 {
-	unsigned int psci_state;
-
-	/* Get the raw psci state */
-	psci_state = psci_get_state(node);
-
 	if (psci_state == PSCI_STATE_ON || psci_state == PSCI_STATE_SUSPEND)
 		return PSCI_E_ALREADY_ON;
 
@@ -71,50 +65,16 @@
  * TODO: Split this code across separate handlers for each type of setup?
  ******************************************************************************/
 static int psci_afflvl0_on(unsigned long target_cpu,
-			   aff_map_node_t *cpu_node,
-			   unsigned long ns_entrypoint,
-			   unsigned long context_id)
+			   aff_map_node_t *cpu_node)
 {
 	unsigned long psci_entrypoint;
-	uint32_t ns_scr_el3 = read_scr_el3();
-	uint32_t ns_sctlr_el1 = read_sctlr_el1();
-	int rc;
 
 	/* Sanity check to safeguard against data corruption */
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
-	/*
-	 * Generic management: Ensure that the cpu is off to be
-	 * turned on
-	 */
-	rc = cpu_on_validate_state(cpu_node);
-	if (rc != PSCI_E_SUCCESS)
-		return rc;
-
-	/*
-	 * Call the cpu on handler registered by the Secure Payload Dispatcher
-	 * to let it do any bookeeping. If the handler encounters an error, it's
-	 * expected to assert within
-	 */
-	if (psci_spd_pm && psci_spd_pm->svc_on)
-		psci_spd_pm->svc_on(target_cpu);
-
-	/*
-	 * Arch. management: Derive the re-entry information for
-	 * the non-secure world from the non-secure state from
-	 * where this call originated.
-	 */
-	rc = psci_save_ns_entry(target_cpu, ns_entrypoint, context_id,
-				ns_scr_el3, ns_sctlr_el1);
-	if (rc != PSCI_E_SUCCESS)
-		return rc;
-
 	/* Set the secure world (EL3) re-entry point after BL1 */
 	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
 
-	if (!psci_plat_pm_ops->affinst_on)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
@@ -122,7 +82,6 @@
 	 */
 	return psci_plat_pm_ops->affinst_on(target_cpu,
 					    psci_entrypoint,
-					    ns_entrypoint,
 					    cpu_node->level,
 					    psci_get_phys_state(cpu_node));
 }
@@ -133,9 +92,7 @@
  * TODO: Split this code across separate handlers for each type of setup?
  ******************************************************************************/
 static int psci_afflvl1_on(unsigned long target_cpu,
-			   aff_map_node_t *cluster_node,
-			   unsigned long ns_entrypoint,
-			   unsigned long context_id)
+			   aff_map_node_t *cluster_node)
 {
 	unsigned long psci_entrypoint;
 
@@ -148,9 +105,6 @@
 
 	/* State management: Is not required while turning a cluster on */
 
-	if (!psci_plat_pm_ops->affinst_on)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
@@ -159,7 +113,6 @@
 	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
 	return psci_plat_pm_ops->affinst_on(target_cpu,
 					    psci_entrypoint,
-					    ns_entrypoint,
 					    cluster_node->level,
 					    psci_get_phys_state(cluster_node));
 }
@@ -170,9 +123,7 @@
  * TODO: Split this code across separate handlers for each type of setup?
  ******************************************************************************/
 static int psci_afflvl2_on(unsigned long target_cpu,
-			   aff_map_node_t *system_node,
-			   unsigned long ns_entrypoint,
-			   unsigned long context_id)
+			   aff_map_node_t *system_node)
 {
 	unsigned long psci_entrypoint;
 
@@ -186,9 +137,6 @@
 
 	/* State management: Is not required while turning a system on */
 
-	if (!psci_plat_pm_ops->affinst_on)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Give the platform the current state
 	 * of the target cpu to allow it to perform the necessary
@@ -197,7 +145,6 @@
 	psci_entrypoint = (unsigned long) psci_aff_on_finish_entry;
 	return psci_plat_pm_ops->affinst_on(target_cpu,
 					    psci_entrypoint,
-					    ns_entrypoint,
 					    system_node->level,
 					    psci_get_phys_state(system_node));
 }
@@ -217,9 +164,7 @@
 static int psci_call_on_handlers(aff_map_node_t *target_cpu_nodes[],
 				 int start_afflvl,
 				 int end_afflvl,
-				 unsigned long target_cpu,
-				 unsigned long entrypoint,
-				 unsigned long context_id)
+				 unsigned long target_cpu)
 {
 	int rc = PSCI_E_INVALID_PARAMS, level;
 	aff_map_node_t *node;
@@ -235,9 +180,7 @@
 		 * affinity levels.
 		 */
 		rc = psci_afflvl_on_handlers[level](target_cpu,
-						    node,
-						    entrypoint,
-						    context_id);
+						    node);
 		if (rc != PSCI_E_SUCCESS)
 			break;
 	}
@@ -258,19 +201,25 @@
  *
  * The affinity level specific handlers are called in descending order i.e. from
  * the highest to the lowest affinity level implemented by the platform because
- * to turn on affinity level X it is neccesary to turn on affinity level X + 1
+ * to turn on affinity level X it is necessary to turn on affinity level X + 1
  * first.
  ******************************************************************************/
 int psci_afflvl_on(unsigned long target_cpu,
-		   unsigned long entrypoint,
-		   unsigned long context_id,
+		   entry_point_info_t *ep,
 		   int start_afflvl,
 		   int end_afflvl)
 {
-	int rc = PSCI_E_SUCCESS;
+	int rc;
 	mpidr_aff_map_nodes_t target_cpu_nodes;
 
 	/*
+	 * This function must only be called on platforms where the
+	 * CPU_ON platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->affinst_on &&
+			psci_plat_pm_ops->affinst_on_finish);
+
+	/*
 	 * Collect the pointers to the nodes in the topology tree for
 	 * each affinity instance in the mpidr. If this function does
 	 * not return successfully then either the mpidr or the affinity
@@ -280,9 +229,7 @@
 				    start_afflvl,
 				    end_afflvl,
 				    target_cpu_nodes);
-	if (rc != PSCI_E_SUCCESS)
-		return rc;
-
+	assert(rc == PSCI_E_SUCCESS);
 
 	/*
 	 * This function acquires the lock corresponding to each affinity
@@ -293,25 +240,49 @@
 				  end_afflvl,
 				  target_cpu_nodes);
 
+	/*
+	 * Generic management: Ensure that the cpu is off to be
+	 * turned on.
+	 */
+	rc = cpu_on_validate_state(psci_get_state(
+				    target_cpu_nodes[MPIDR_AFFLVL0]));
+	if (rc != PSCI_E_SUCCESS)
+		goto exit;
+
+	/*
+	 * Call the cpu on handler registered by the Secure Payload Dispatcher
+	 * to let it do any bookeeping. If the handler encounters an error, it's
+	 * expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_on)
+		psci_spd_pm->svc_on(target_cpu);
+
 	/* Perform generic, architecture and platform specific handling. */
 	rc = psci_call_on_handlers(target_cpu_nodes,
 				   start_afflvl,
 				   end_afflvl,
-				   target_cpu,
-				   entrypoint,
-				   context_id);
+				   target_cpu);
+
+	assert(rc == PSCI_E_SUCCESS || rc == PSCI_E_INTERN_FAIL);
 
 	/*
 	 * This function updates the state of each affinity instance
 	 * corresponding to the mpidr in the range of affinity levels
 	 * specified.
 	 */
-	if (rc == PSCI_E_SUCCESS)
+	if (rc == PSCI_E_SUCCESS) {
 		psci_do_afflvl_state_mgmt(start_afflvl,
 					  end_afflvl,
 					  target_cpu_nodes,
 					  PSCI_STATE_ON_PENDING);
 
+		/*
+		 * Store the re-entry information for the non-secure world.
+		 */
+		cm_init_context(target_cpu, ep);
+	}
+
+exit:
 	/*
 	 * This loop releases the lock corresponding to each affinity level
 	 * in the reverse order to which they were acquired.
@@ -327,9 +298,9 @@
  * The following functions finish an earlier affinity power on request. They
  * are called by the common finisher routine in psci_common.c.
  ******************************************************************************/
-static unsigned int psci_afflvl0_on_finish(aff_map_node_t *cpu_node)
+static void psci_afflvl0_on_finish(aff_map_node_t *cpu_node)
 {
-	unsigned int plat_state, state, rc;
+	unsigned int plat_state, state;
 
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
@@ -343,15 +314,11 @@
 	 * register. The actual state of this cpu has already been
 	 * changed.
 	 */
-	if (psci_plat_pm_ops->affinst_on_finish) {
 
-		/* Get the physical state of this cpu */
-		plat_state = get_phys_state(state);
-		rc = psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
-							 cpu_node->level,
+	/* Get the physical state of this cpu */
+	plat_state = get_phys_state(state);
+	psci_plat_pm_ops->affinst_on_finish(cpu_node->level,
 							 plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
 
 	/*
 	 * Arch. management: Enable data cache and manage stack memory
@@ -382,20 +349,14 @@
 
 	/* Clean caches before re-entering normal world */
 	dcsw_op_louis(DCCSW);
-
-	rc = PSCI_E_SUCCESS;
-	return rc;
 }
 
-static unsigned int psci_afflvl1_on_finish(aff_map_node_t *cluster_node)
+static void psci_afflvl1_on_finish(aff_map_node_t *cluster_node)
 {
 	unsigned int plat_state;
 
 	assert(cluster_node->level == MPIDR_AFFLVL1);
 
-	if (!psci_plat_pm_ops->affinst_on_finish)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Perform the platform specific actions
 	 * as per the old state of the cluster e.g. enabling
@@ -405,22 +366,18 @@
 	 * situation.
 	 */
 	plat_state = psci_get_phys_state(cluster_node);
-	return psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
-						 cluster_node->level,
+	psci_plat_pm_ops->affinst_on_finish(cluster_node->level,
 						 plat_state);
 }
 
 
-static unsigned int psci_afflvl2_on_finish(aff_map_node_t *system_node)
+static void psci_afflvl2_on_finish(aff_map_node_t *system_node)
 {
 	unsigned int plat_state;
 
 	/* Cannot go beyond this affinity level */
 	assert(system_node->level == MPIDR_AFFLVL2);
 
-	if (!psci_plat_pm_ops->affinst_on_finish)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Currently, there are no architectural actions to perform
 	 * at the system level.
@@ -435,8 +392,7 @@
 	 * situation.
 	 */
 	plat_state = psci_get_phys_state(system_node);
-	return psci_plat_pm_ops->affinst_on_finish(read_mpidr_el1(),
-						   system_node->level,
+	psci_plat_pm_ops->affinst_on_finish(system_node->level,
 						   plat_state);
 }
 
diff --git a/services/std_svc/psci/psci_afflvl_suspend.c b/services/std_svc/psci/psci_afflvl_suspend.c
index 4fcabfc..dad0cef 100644
--- a/services/std_svc/psci/psci_afflvl_suspend.c
+++ b/services/std_svc/psci/psci_afflvl_suspend.c
@@ -35,15 +35,13 @@
 #include <context.h>
 #include <context_mgmt.h>
 #include <cpu_data.h>
+#include <debug.h>
 #include <platform.h>
 #include <runtime_svc.h>
 #include <stddef.h>
 #include "psci_private.h"
 
-typedef int (*afflvl_suspend_handler_t)(aff_map_node_t *,
-				      unsigned long,
-				      unsigned long,
-				      unsigned int);
+typedef void (*afflvl_suspend_handler_t)(aff_map_node_t *node);
 
 /*******************************************************************************
  * This function saves the power state parameter passed in the current PSCI
@@ -105,44 +103,13 @@
  * The next three functions implement a handler for each supported affinity
  * level which is called when that affinity level is about to be suspended.
  ******************************************************************************/
-static int psci_afflvl0_suspend(aff_map_node_t *cpu_node,
-				unsigned long ns_entrypoint,
-				unsigned long context_id,
-				unsigned int power_state)
+static void psci_afflvl0_suspend(aff_map_node_t *cpu_node)
 {
 	unsigned long psci_entrypoint;
-	uint32_t ns_scr_el3 = read_scr_el3();
-	uint32_t ns_sctlr_el1 = read_sctlr_el1();
-	int rc;
 
 	/* Sanity check to safeguard against data corruption */
 	assert(cpu_node->level == MPIDR_AFFLVL0);
 
-	/* Save PSCI power state parameter for the core in suspend context */
-	psci_set_suspend_power_state(power_state);
-
-	/*
-	 * Generic management: Store the re-entry information for the non-secure
-	 * world and allow the secure world to suspend itself
-	 */
-
-	/*
-	 * Call the cpu suspend handler registered by the Secure Payload
-	 * Dispatcher to let it do any bookeeping. If the handler encounters an
-	 * error, it's expected to assert within
-	 */
-	if (psci_spd_pm && psci_spd_pm->svc_suspend)
-		psci_spd_pm->svc_suspend(power_state);
-
-	/*
-	 * Generic management: Store the re-entry information for the
-	 * non-secure world
-	 */
-	rc = psci_save_ns_entry(read_mpidr_el1(), ns_entrypoint, context_id,
-				ns_scr_el3, ns_sctlr_el1);
-	if (rc != PSCI_E_SUCCESS)
-		return rc;
-
 	/* Set the secure world (EL3) re-entry point after BL1 */
 	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
 
@@ -152,26 +119,18 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL0);
 
-	if (!psci_plat_pm_ops->affinst_suspend)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. management: Allow the platform to perform the
 	 * necessary actions to turn off this cpu e.g. set the
 	 * platform defined mailbox with the psci entrypoint,
 	 * program the power controller etc.
 	 */
-	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						 psci_entrypoint,
-						 ns_entrypoint,
+	psci_plat_pm_ops->affinst_suspend(psci_entrypoint,
 						 cpu_node->level,
 						 psci_get_phys_state(cpu_node));
 }
 
-static int psci_afflvl1_suspend(aff_map_node_t *cluster_node,
-				unsigned long ns_entrypoint,
-				unsigned long context_id,
-				unsigned int power_state)
+static void psci_afflvl1_suspend(aff_map_node_t *cluster_node)
 {
 	unsigned int plat_state;
 	unsigned long psci_entrypoint;
@@ -185,9 +144,6 @@
 	 */
 	psci_do_pwrdown_cache_maintenance(MPIDR_AFFLVL1);
 
-	if (!psci_plat_pm_ops->affinst_suspend)
-		return PSCI_E_SUCCESS;
-
 	/*
 	 * Plat. Management. Allow the platform to do its cluster specific
 	 * bookeeping e.g. turn off interconnect coherency, program the power
@@ -198,18 +154,13 @@
 	 */
 	plat_state = psci_get_phys_state(cluster_node);
 	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
-	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						 psci_entrypoint,
-						 ns_entrypoint,
+	psci_plat_pm_ops->affinst_suspend(psci_entrypoint,
 						 cluster_node->level,
 						 plat_state);
 }
 
 
-static int psci_afflvl2_suspend(aff_map_node_t *system_node,
-				unsigned long ns_entrypoint,
-				unsigned long context_id,
-				unsigned int power_state)
+static void psci_afflvl2_suspend(aff_map_node_t *system_node)
 {
 	unsigned int plat_state;
 	unsigned long psci_entrypoint;
@@ -233,8 +184,6 @@
 	 * Plat. Management : Allow the platform to do its bookeeping
 	 * at this affinity level
 	 */
-	if (!psci_plat_pm_ops->affinst_suspend)
-		return PSCI_E_SUCCESS;
 
 	/*
 	 * Sending the psci entrypoint is currently redundant
@@ -244,9 +193,7 @@
 	 */
 	plat_state = psci_get_phys_state(system_node);
 	psci_entrypoint = (unsigned long) psci_aff_suspend_finish_entry;
-	return psci_plat_pm_ops->affinst_suspend(read_mpidr_el1(),
-						 psci_entrypoint,
-						 ns_entrypoint,
+	psci_plat_pm_ops->affinst_suspend(psci_entrypoint,
 						 system_node->level,
 						 plat_state);
 }
@@ -262,14 +209,11 @@
  * topology tree and calls the suspend handler for the corresponding affinity
  * levels
  ******************************************************************************/
-static int psci_call_suspend_handlers(aff_map_node_t *mpidr_nodes[],
+static void psci_call_suspend_handlers(aff_map_node_t *mpidr_nodes[],
 				      int start_afflvl,
-				      int end_afflvl,
-				      unsigned long entrypoint,
-				      unsigned long context_id,
-				      unsigned int power_state)
+				      int end_afflvl)
 {
-	int rc = PSCI_E_INVALID_PARAMS, level;
+	int level;
 	aff_map_node_t *node;
 
 	for (level = start_afflvl; level <= end_afflvl; level++) {
@@ -277,20 +221,8 @@
 		if (node == NULL)
 			continue;
 
-		/*
-		 * TODO: In case of an error should there be a way
-		 * of restoring what we might have torn down at
-		 * lower affinity levels.
-		 */
-		rc = psci_afflvl_suspend_handlers[level](node,
-							 entrypoint,
-							 context_id,
-							 power_state);
-		if (rc != PSCI_E_SUCCESS)
-			break;
+		psci_afflvl_suspend_handlers[level](node);
 	}
-
-	return rc;
 }
 
 /*******************************************************************************
@@ -311,29 +243,36 @@
  * the lowest to the highest affinity level implemented by the platform because
  * to turn off affinity level X it is neccesary to turn off affinity level X - 1
  * first.
+ *
+ * All the required parameter checks are performed at the beginning and after
+ * the state transition has been done, no further error is expected and it
+ * is not possible to undo any of the actions taken beyond that point.
  ******************************************************************************/
-int psci_afflvl_suspend(unsigned long entrypoint,
-			unsigned long context_id,
-			unsigned int power_state,
+void psci_afflvl_suspend(entry_point_info_t *ep,
 			int start_afflvl,
 			int end_afflvl)
 {
-	int rc = PSCI_E_SUCCESS;
+	int skip_wfi = 0;
 	mpidr_aff_map_nodes_t mpidr_nodes;
 	unsigned int max_phys_off_afflvl;
 
 	/*
+	 * This function must only be called on platforms where the
+	 * CPU_SUSPEND platform hooks have been implemented.
+	 */
+	assert(psci_plat_pm_ops->affinst_suspend &&
+			psci_plat_pm_ops->affinst_suspend_finish);
+
+	/*
 	 * Collect the pointers to the nodes in the topology tree for
 	 * each affinity instance in the mpidr. If this function does
 	 * not return successfully then either the mpidr or the affinity
-	 * levels are incorrect.
+	 * levels are incorrect. Either way, this an internal TF error
+	 * therefore assert.
 	 */
-	rc = psci_get_aff_map_nodes(read_mpidr_el1() & MPIDR_AFFINITY_MASK,
-				    start_afflvl,
-				    end_afflvl,
-				    mpidr_nodes);
-	if (rc != PSCI_E_SUCCESS)
-		return rc;
+	if (psci_get_aff_map_nodes(read_mpidr_el1() & MPIDR_AFFINITY_MASK,
+		   start_afflvl, end_afflvl, mpidr_nodes) != PSCI_E_SUCCESS)
+		assert(0);
 
 	/*
 	 * This function acquires the lock corresponding to each affinity
@@ -345,6 +284,24 @@
 				  mpidr_nodes);
 
 	/*
+	 * We check if there are any pending interrupts after the delay
+	 * introduced by lock contention to increase the chances of early
+	 * detection that a wake-up interrupt has fired.
+	 */
+	if (read_isr_el1()) {
+		skip_wfi = 1;
+		goto exit;
+	}
+
+	/*
+	 * Call the cpu suspend handler registered by the Secure Payload
+	 * Dispatcher to let it do any bookeeping. If the handler encounters an
+	 * error, it's expected to assert within
+	 */
+	if (psci_spd_pm && psci_spd_pm->svc_suspend)
+		psci_spd_pm->svc_suspend(0);
+
+	/*
 	 * This function updates the state of each affinity instance
 	 * corresponding to the mpidr in the range of affinity levels
 	 * specified.
@@ -362,13 +319,15 @@
 	/* Stash the highest affinity level that will be turned off */
 	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
 
+	/*
+	 * Store the re-entry information for the non-secure world.
+	 */
+	cm_init_context(read_mpidr_el1(), ep);
+
 	/* Perform generic, architecture and platform specific handling */
-	rc = psci_call_suspend_handlers(mpidr_nodes,
+	psci_call_suspend_handlers(mpidr_nodes,
 					start_afflvl,
-					end_afflvl,
-					entrypoint,
-					context_id,
-					power_state);
+					end_afflvl);
 
 	/*
 	 * Invalidate the entry for the highest affinity level stashed earlier.
@@ -377,6 +336,7 @@
 	 */
 	psci_set_max_phys_off_afflvl(PSCI_INVALID_DATA);
 
+exit:
 	/*
 	 * Release the locks corresponding to each affinity level in the
 	 * reverse order to which they were acquired.
@@ -384,17 +344,17 @@
 	psci_release_afflvl_locks(start_afflvl,
 				  end_afflvl,
 				  mpidr_nodes);
-
-	return rc;
+	if (!skip_wfi)
+		psci_power_down_wfi();
 }
 
 /*******************************************************************************
  * The following functions finish an earlier affinity suspend request. They
  * are called by the common finisher routine in psci_common.c.
  ******************************************************************************/
-static unsigned int psci_afflvl0_suspend_finish(aff_map_node_t *cpu_node)
+static void psci_afflvl0_suspend_finish(aff_map_node_t *cpu_node)
 {
-	unsigned int plat_state, state, rc;
+	unsigned int plat_state, state;
 	int32_t suspend_level;
 	uint64_t counter_freq;
 
@@ -411,17 +371,12 @@
 	 * wrong then assert as there is no way to recover from this
 	 * situation.
 	 */
-	if (psci_plat_pm_ops->affinst_suspend_finish) {
 
-		/* Get the physical state of this cpu */
-		plat_state = get_phys_state(state);
-		rc = psci_plat_pm_ops->affinst_suspend_finish(read_mpidr_el1(),
-							      cpu_node->level,
+	/* Get the physical state of this cpu */
+	plat_state = get_phys_state(state);
+	psci_plat_pm_ops->affinst_suspend_finish(cpu_node->level,
 							      plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
 
-	/* Get the index for restoring the re-entry information */
 	/*
 	 * Arch. management: Enable the data cache, manage stack memory and
 	 * restore the stashed EL3 architectural context from the 'cpu_context'
@@ -456,14 +411,11 @@
 
 	/* Clean caches before re-entering normal world */
 	dcsw_op_louis(DCCSW);
-
-	rc = PSCI_E_SUCCESS;
-	return rc;
 }
 
-static unsigned int psci_afflvl1_suspend_finish(aff_map_node_t *cluster_node)
+static void psci_afflvl1_suspend_finish(aff_map_node_t *cluster_node)
 {
-	unsigned int plat_state, rc = PSCI_E_SUCCESS;
+	unsigned int plat_state;
 
 	assert(cluster_node->level == MPIDR_AFFLVL1);
 
@@ -475,23 +427,17 @@
 	 * then assert as there is no way to recover from this
 	 * situation.
 	 */
-	if (psci_plat_pm_ops->affinst_suspend_finish) {
 
-		/* Get the physical state of this cpu */
-		plat_state = psci_get_phys_state(cluster_node);
-		rc = psci_plat_pm_ops->affinst_suspend_finish(read_mpidr_el1(),
-							      cluster_node->level,
-							      plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
-
-	return rc;
+	/* Get the physical state of this cpu */
+	plat_state = psci_get_phys_state(cluster_node);
+	psci_plat_pm_ops->affinst_suspend_finish(cluster_node->level,
+						      plat_state);
 }
 
 
-static unsigned int psci_afflvl2_suspend_finish(aff_map_node_t *system_node)
+static void psci_afflvl2_suspend_finish(aff_map_node_t *system_node)
 {
-	unsigned int plat_state, rc = PSCI_E_SUCCESS;;
+	unsigned int plat_state;
 
 	/* Cannot go beyond this affinity level */
 	assert(system_node->level == MPIDR_AFFLVL2);
@@ -509,17 +455,11 @@
 	 * then assert as there is no way to recover from this
 	 * situation.
 	 */
-	if (psci_plat_pm_ops->affinst_suspend_finish) {
 
-		/* Get the physical state of the system */
-		plat_state = psci_get_phys_state(system_node);
-		rc = psci_plat_pm_ops->affinst_suspend_finish(read_mpidr_el1(),
-							      system_node->level,
-							      plat_state);
-		assert(rc == PSCI_E_SUCCESS);
-	}
-
-	return rc;
+	/* Get the physical state of the system */
+	plat_state = psci_get_phys_state(system_node);
+	psci_plat_pm_ops->affinst_suspend_finish(system_node->level,
+						      plat_state);
 }
 
 const afflvl_power_on_finisher_t psci_afflvl_suspend_finishers[] = {
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index 155f842..a31643e 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -51,7 +51,10 @@
  * corresponds to an affinity instance e.g. cluster, cpu within an mpidr
  ******************************************************************************/
 aff_map_node_t psci_aff_map[PSCI_NUM_AFFS]
-__attribute__ ((section("tzfw_coherent_mem")));
+#if USE_COHERENT_MEM
+__attribute__ ((section("tzfw_coherent_mem")))
+#endif
+;
 
 /*******************************************************************************
  * Pointer to functions exported by the platform to complete power mgmt. ops
@@ -246,7 +249,8 @@
 	for (level = start_afflvl; level <= end_afflvl; level++) {
 		if (mpidr_nodes[level] == NULL)
 			continue;
-		bakery_lock_get(&mpidr_nodes[level]->lock);
+
+		psci_lock_get(mpidr_nodes[level]);
 	}
 }
 
@@ -264,7 +268,8 @@
 	for (level = end_afflvl; level >= start_afflvl; level--) {
 		if (mpidr_nodes[level] == NULL)
 			continue;
-		bakery_lock_release(&mpidr_nodes[level]->lock);
+
+		psci_lock_release(mpidr_nodes[level]);
 	}
 }
 
@@ -285,15 +290,14 @@
 
 /*******************************************************************************
  * This function determines the full entrypoint information for the requested
- * PSCI entrypoint on power on/resume and saves this in the non-secure CPU
- * cpu_context, ready for when the core boots.
+ * PSCI entrypoint on power on/resume and returns it.
  ******************************************************************************/
-int psci_save_ns_entry(uint64_t mpidr,
-		       uint64_t entrypoint, uint64_t context_id,
-		       uint32_t ns_scr_el3, uint32_t ns_sctlr_el1)
+int psci_get_ns_ep_info(entry_point_info_t *ep,
+		       uint64_t entrypoint, uint64_t context_id)
 {
 	uint32_t ep_attr, mode, sctlr, daif, ee;
-	entry_point_info_t ep;
+	uint32_t ns_scr_el3 = read_scr_el3();
+	uint32_t ns_sctlr_el1 = read_sctlr_el1();
 
 	sctlr = ns_scr_el3 & SCR_HCE_BIT ? read_sctlr_el2() : ns_sctlr_el1;
 	ee = 0;
@@ -303,11 +307,11 @@
 		ep_attr |= EP_EE_BIG;
 		ee = 1;
 	}
-	SET_PARAM_HEAD(&ep, PARAM_EP, VERSION_1, ep_attr);
+	SET_PARAM_HEAD(ep, PARAM_EP, VERSION_1, ep_attr);
 
-	ep.pc = entrypoint;
-	memset(&ep.args, 0, sizeof(ep.args));
-	ep.args.arg0 = context_id;
+	ep->pc = entrypoint;
+	memset(&ep->args, 0, sizeof(ep->args));
+	ep->args.arg0 = context_id;
 
 	/*
 	 * Figure out whether the cpu enters the non-secure address space
@@ -324,7 +328,7 @@
 
 		mode = ns_scr_el3 & SCR_HCE_BIT ? MODE_EL2 : MODE_EL1;
 
-		ep.spsr = SPSR_64(mode, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS);
+		ep->spsr = SPSR_64(mode, MODE_SP_ELX, DISABLE_ALL_EXCEPTIONS);
 	} else {
 
 		mode = ns_scr_el3 & SCR_HCE_BIT ? MODE32_hyp : MODE32_svc;
@@ -335,12 +339,9 @@
 		 */
 		daif = DAIF_ABT_BIT | DAIF_IRQ_BIT | DAIF_FIQ_BIT;
 
-		ep.spsr = SPSR_MODE32(mode, entrypoint & 0x1, ee, daif);
+		ep->spsr = SPSR_MODE32(mode, entrypoint & 0x1, ee, daif);
 	}
 
-	/* initialise an entrypoint to set up the CPU context */
-	cm_init_context(mpidr, &ep);
-
 	return PSCI_E_SUCCESS;
 }
 
@@ -350,6 +351,10 @@
  ******************************************************************************/
 unsigned short psci_get_state(aff_map_node_t *node)
 {
+#if !USE_COHERENT_MEM
+	flush_dcache_range((uint64_t) node, sizeof(*node));
+#endif
+
 	assert(node->level >= MPIDR_AFFLVL0 && node->level <= MPIDR_MAX_AFFLVL);
 
 	/* A cpu node just contains the state which can be directly returned */
@@ -407,6 +412,10 @@
 		node->state &= ~(PSCI_STATE_MASK << PSCI_STATE_SHIFT);
 		node->state |= (state & PSCI_STATE_MASK) << PSCI_STATE_SHIFT;
 	}
+
+#if !USE_COHERENT_MEM
+	flush_dcache_range((uint64_t) node, sizeof(*node));
+#endif
 }
 
 /*******************************************************************************
@@ -429,12 +438,12 @@
  * topology tree and calls the physical power on handler for the corresponding
  * affinity levels
  ******************************************************************************/
-static int psci_call_power_on_handlers(aff_map_node_t *mpidr_nodes[],
+static void psci_call_power_on_handlers(aff_map_node_t *mpidr_nodes[],
 				       int start_afflvl,
 				       int end_afflvl,
 				       afflvl_power_on_finisher_t *pon_handlers)
 {
-	int rc = PSCI_E_INVALID_PARAMS, level;
+	int level;
 	aff_map_node_t *node;
 
 	for (level = end_afflvl; level >= start_afflvl; level--) {
@@ -448,12 +457,8 @@
 		 * so simply return an error and let the caller take
 		 * care of the situation.
 		 */
-		rc = pon_handlers[level](node);
-		if (rc != PSCI_E_SUCCESS)
-			break;
+		pon_handlers[level](node);
 	}
-
-	return rc;
 }
 
 /*******************************************************************************
@@ -515,12 +520,10 @@
 	psci_set_max_phys_off_afflvl(max_phys_off_afflvl);
 
 	/* Perform generic, architecture and platform specific handling */
-	rc = psci_call_power_on_handlers(mpidr_nodes,
+	psci_call_power_on_handlers(mpidr_nodes,
 					 start_afflvl,
 					 end_afflvl,
 					 pon_handlers);
-	if (rc != PSCI_E_SUCCESS)
-		panic();
 
 	/*
 	 * This function updates the state of each affinity instance
@@ -555,10 +558,41 @@
  ******************************************************************************/
 void psci_register_spd_pm_hook(const spd_pm_ops_t *pm)
 {
+	assert(pm);
 	psci_spd_pm = pm;
+
+	if (pm->svc_migrate)
+		psci_caps |= define_psci_cap(PSCI_MIG_AARCH64);
+
+	if (pm->svc_migrate_info)
+		psci_caps |= define_psci_cap(PSCI_MIG_INFO_UP_CPU_AARCH64)
+				| define_psci_cap(PSCI_MIG_INFO_TYPE);
 }
 
 /*******************************************************************************
+ * This function invokes the migrate info hook in the spd_pm_ops. It performs
+ * the necessary return value validation. If the Secure Payload is UP and
+ * migrate capable, it returns the mpidr of the CPU on which the Secure payload
+ * is resident through the mpidr parameter. Else the value of the parameter on
+ * return is undefined.
+ ******************************************************************************/
+int psci_spd_migrate_info(uint64_t *mpidr)
+{
+	int rc;
+
+	if (!psci_spd_pm || !psci_spd_pm->svc_migrate_info)
+		return PSCI_E_NOT_SUPPORTED;
+
+	rc = psci_spd_pm->svc_migrate_info(mpidr);
+
+	assert(rc == PSCI_TOS_UP_MIG_CAP || rc == PSCI_TOS_NOT_UP_MIG_CAP \
+		|| rc == PSCI_TOS_NOT_PRESENT_MP || rc == PSCI_E_NOT_SUPPORTED);
+
+	return rc;
+}
+
+
+/*******************************************************************************
  * This function prints the state of all affinity instances present in the
  * system
  ******************************************************************************/
diff --git a/services/std_svc/psci/psci_entry.S b/services/std_svc/psci/psci_entry.S
index 8145012..3e67d34 100644
--- a/services/std_svc/psci/psci_entry.S
+++ b/services/std_svc/psci/psci_entry.S
@@ -54,9 +54,18 @@
 psci_aff_common_finish_entry:
 #if !RESET_TO_BL31
 	/* ---------------------------------------------
+	 * Perform any processor specific actions which
+	 * undo or are in addition to the actions
+	 * performed by the reset handler in the BootROM
+	 * (BL1) e.g. cache, tlb invalidations, errata
+	 * workarounds etc.
+	 * ---------------------------------------------
+	 */
+	bl      reset_handler
+
+	/* ---------------------------------------------
 	 * Enable the instruction cache, stack pointer
-	 * and data access alignment checks. Also, set
-	 * the EL3 exception endianess to little-endian.
+	 * and data access alignment checks.
 	 * It can be assumed that BL3-1 entrypoint code
 	 * will do this when RESET_TO_BL31 is set. The
 	 * same  assumption cannot be made when another
diff --git a/services/std_svc/psci/psci_main.c b/services/std_svc/psci/psci_main.c
index 2e700e8..d8a0009 100644
--- a/services/std_svc/psci/psci_main.c
+++ b/services/std_svc/psci/psci_main.c
@@ -32,6 +32,7 @@
 #include <arch_helpers.h>
 #include <assert.h>
 #include <runtime_svc.h>
+#include <std_svc.h>
 #include <debug.h>
 #include "psci_private.h"
 
@@ -45,13 +46,33 @@
 {
 	int rc;
 	unsigned int start_afflvl, end_afflvl;
+	entry_point_info_t ep;
 
 	/* Determine if the cpu exists of not */
 	rc = psci_validate_mpidr(target_cpu, MPIDR_AFFLVL0);
 	if (rc != PSCI_E_SUCCESS) {
-		goto exit;
+		return PSCI_E_INVALID_PARAMS;
 	}
 
+	/* Validate the entrypoint using platform pm_ops */
+	if (psci_plat_pm_ops->validate_ns_entrypoint) {
+		rc = psci_plat_pm_ops->validate_ns_entrypoint(entrypoint);
+		if (rc != PSCI_E_SUCCESS) {
+			assert(rc == PSCI_E_INVALID_PARAMS);
+			return PSCI_E_INVALID_PARAMS;
+		}
+	}
+
+	/*
+	 * Verify and derive the re-entry information for
+	 * the non-secure world from the non-secure state from
+	 * where this call originated.
+	 */
+	rc = psci_get_ns_ep_info(&ep, entrypoint, context_id);
+	if (rc != PSCI_E_SUCCESS)
+		return rc;
+
+
 	/*
 	 * To turn this cpu on, specify which affinity
 	 * levels need to be turned on
@@ -59,12 +80,10 @@
 	start_afflvl = MPIDR_AFFLVL0;
 	end_afflvl = get_max_afflvl();
 	rc = psci_afflvl_on(target_cpu,
-			    entrypoint,
-			    context_id,
+			    &ep,
 			    start_afflvl,
 			    end_afflvl);
 
-exit:
 	return rc;
 }
 
@@ -79,6 +98,7 @@
 {
 	int rc;
 	unsigned int target_afflvl, pstate_type;
+	entry_point_info_t ep;
 
 	/* Check SBZ bits in power state are zero */
 	if (psci_validate_power_state(power_state))
@@ -89,6 +109,24 @@
 	if (target_afflvl > get_max_afflvl())
 		return PSCI_E_INVALID_PARAMS;
 
+	/* Validate the power_state using platform pm_ops */
+	if (psci_plat_pm_ops->validate_power_state) {
+		rc = psci_plat_pm_ops->validate_power_state(power_state);
+		if (rc != PSCI_E_SUCCESS) {
+			assert(rc == PSCI_E_INVALID_PARAMS);
+			return PSCI_E_INVALID_PARAMS;
+		}
+	}
+
+	/* Validate the entrypoint using platform pm_ops */
+	if (psci_plat_pm_ops->validate_ns_entrypoint) {
+		rc = psci_plat_pm_ops->validate_ns_entrypoint(entrypoint);
+		if (rc != PSCI_E_SUCCESS) {
+			assert(rc == PSCI_E_INVALID_PARAMS);
+			return PSCI_E_INVALID_PARAMS;
+		}
+	}
+
 	/* Determine the 'state type' in the 'power_state' parameter */
 	pstate_type = psci_get_pstate_type(power_state);
 
@@ -100,25 +138,33 @@
 		if  (!psci_plat_pm_ops->affinst_standby)
 			return PSCI_E_INVALID_PARAMS;
 
-		rc = psci_plat_pm_ops->affinst_standby(power_state);
-		assert(rc == PSCI_E_INVALID_PARAMS || rc == PSCI_E_SUCCESS);
-		return rc;
+		psci_plat_pm_ops->affinst_standby(power_state);
+		return PSCI_E_SUCCESS;
 	}
 
 	/*
-	 * Do what is needed to enter the power down state. Upon success,
-	 * enter the final wfi which will power down this cpu else return
-	 * an error.
+	 * Verify and derive the re-entry information for
+	 * the non-secure world from the non-secure state from
+	 * where this call originated.
 	 */
-	rc = psci_afflvl_suspend(entrypoint,
-				 context_id,
-				 power_state,
-				 MPIDR_AFFLVL0,
-				 target_afflvl);
-	if (rc == PSCI_E_SUCCESS)
-		psci_power_down_wfi();
-	assert(rc == PSCI_E_INVALID_PARAMS);
-	return rc;
+	rc = psci_get_ns_ep_info(&ep, entrypoint, context_id);
+	if (rc != PSCI_E_SUCCESS)
+		return rc;
+
+	/* Save PSCI power state parameter for the core in suspend context */
+	psci_set_suspend_power_state(power_state);
+
+	/*
+	 * Do what is needed to enter the power down state. Upon success,
+	 * enter the final wfi which will power down this CPU.
+	 */
+	psci_afflvl_suspend(&ep,
+			    MPIDR_AFFLVL0,
+			    target_afflvl);
+
+	/* Reset PSCI power state parameter for the core. */
+	psci_set_suspend_power_state(PSCI_INVALID_DATA);
+	return PSCI_E_SUCCESS;
 }
 
 int psci_cpu_off(void)
@@ -135,14 +181,6 @@
 	rc = psci_afflvl_off(MPIDR_AFFLVL0, target_afflvl);
 
 	/*
-	 * Check if all actions needed to safely power down this cpu have
-	 * successfully completed. Enter a wfi loop which will allow the
-	 * power controller to physically power down this cpu.
-	 */
-	if (rc == PSCI_E_SUCCESS)
-		psci_power_down_wfi();
-
-	/*
 	 * The only error cpu_off can return is E_DENIED. So check if that's
 	 * indeed the case.
 	 */
@@ -182,24 +220,89 @@
 	return rc;
 }
 
-/* Unimplemented */
-int psci_migrate(unsigned int target_cpu)
+int psci_migrate(unsigned long target_cpu)
 {
-	return PSCI_E_NOT_SUPPORTED;
-}
+	int rc;
+	unsigned long resident_cpu_mpidr;
 
-/* Unimplemented */
-unsigned int psci_migrate_info_type(void)
-{
-	return PSCI_TOS_NOT_PRESENT_MP;
-}
+	rc = psci_spd_migrate_info(&resident_cpu_mpidr);
+	if (rc != PSCI_TOS_UP_MIG_CAP)
+		return (rc == PSCI_TOS_NOT_UP_MIG_CAP) ?
+			  PSCI_E_DENIED : PSCI_E_NOT_SUPPORTED;
 
-unsigned long psci_migrate_info_up_cpu(void)
-{
 	/*
-	 * Return value of this currently unsupported call depends upon
-	 * what psci_migrate_info_type() returns.
+	 * Migrate should only be invoked on the CPU where
+	 * the Secure OS is resident.
 	 */
+	if (resident_cpu_mpidr != read_mpidr_el1())
+		return PSCI_E_NOT_PRESENT;
+
+	/* Check the validity of the specified target cpu */
+	rc = psci_validate_mpidr(target_cpu, MPIDR_AFFLVL0);
+	if (rc != PSCI_E_SUCCESS)
+		return PSCI_E_INVALID_PARAMS;
+
+	assert(psci_spd_pm && psci_spd_pm->svc_migrate);
+
+	rc = psci_spd_pm->svc_migrate(read_mpidr_el1(), target_cpu);
+	assert(rc == PSCI_E_SUCCESS || rc == PSCI_E_INTERN_FAIL);
+
+	return rc;
+}
+
+int psci_migrate_info_type(void)
+{
+	unsigned long resident_cpu_mpidr;
+
+	return psci_spd_migrate_info(&resident_cpu_mpidr);
+}
+
+long psci_migrate_info_up_cpu(void)
+{
+	unsigned long resident_cpu_mpidr;
+	int rc;
+
+	/*
+	 * Return value of this depends upon what
+	 * psci_spd_migrate_info() returns.
+	 */
+	rc = psci_spd_migrate_info(&resident_cpu_mpidr);
+	if (rc != PSCI_TOS_NOT_UP_MIG_CAP && rc != PSCI_TOS_UP_MIG_CAP)
+		return PSCI_E_INVALID_PARAMS;
+
+	return resident_cpu_mpidr;
+}
+
+int psci_features(unsigned int psci_fid)
+{
+	uint32_t local_caps = psci_caps;
+
+	/* Check if it is a 64 bit function */
+	if (((psci_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_64)
+		local_caps &= PSCI_CAP_64BIT_MASK;
+
+	/* Check for invalid fid */
+	if (!(is_std_svc_call(psci_fid) && is_valid_fast_smc(psci_fid)
+			&& is_psci_fid(psci_fid)))
+		return PSCI_E_NOT_SUPPORTED;
+
+
+	/* Check if the psci fid is supported or not */
+	if (!(local_caps & define_psci_cap(psci_fid)))
+		return PSCI_E_NOT_SUPPORTED;
+
+	/* Format the feature flags */
+	if (psci_fid == PSCI_CPU_SUSPEND_AARCH32 ||
+			psci_fid == PSCI_CPU_SUSPEND_AARCH64) {
+		/*
+		 * The trusted firmware uses the original power state format
+		 * and does not support OS Initiated Mode.
+		 */
+		return (FF_PSTATE_ORIG << FF_PSTATE_SHIFT) |
+			((!FF_SUPPORTS_OS_INIT_MODE) << FF_MODE_SUPPORT_SHIFT);
+	}
+
+	/* Return 0 for all other fid's */
 	return PSCI_E_SUCCESS;
 }
 
@@ -218,6 +321,10 @@
 	if (is_caller_secure(flags))
 		SMC_RET1(handle, SMC_UNK);
 
+	/* Check the fid against the capabilities */
+	if (!(psci_caps & define_psci_cap(smc_fid)))
+		SMC_RET1(handle, SMC_UNK);
+
 	if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) {
 		/* 32-bit PSCI function, clear top parameter bits */
 
@@ -258,6 +365,9 @@
 			psci_system_reset();
 			/* We should never return from psci_system_reset() */
 
+		case PSCI_FEATURES:
+			SMC_RET1(handle, psci_features(x1));
+
 		default:
 			break;
 		}
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 24a5604..5484665 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -33,26 +33,60 @@
 
 #include <arch.h>
 #include <bakery_lock.h>
-#include <platform_def.h>	/* for PLATFORM_NUM_AFFS */
+#include <bl_common.h>
 #include <psci.h>
 
-/* Number of affinity instances whose state this psci imp. can track */
-#ifdef PLATFORM_NUM_AFFS
-#define PSCI_NUM_AFFS		PLATFORM_NUM_AFFS
+/*
+ * The following helper macros abstract the interface to the Bakery
+ * Lock API.
+ */
+#if USE_COHERENT_MEM
+#define psci_lock_init(aff_map, idx)	bakery_lock_init(&(aff_map)[(idx)].lock)
+#define psci_lock_get(node)		bakery_lock_get(&((node)->lock))
+#define psci_lock_release(node)		bakery_lock_release(&((node)->lock))
 #else
-#define PSCI_NUM_AFFS		(2 * PLATFORM_CORE_COUNT)
+#define psci_lock_init(aff_map, idx)	((aff_map)[(idx)].aff_map_index = (idx))
+#define psci_lock_get(node)		bakery_lock_get((node)->aff_map_index,	  \
+						CPU_DATA_PSCI_LOCK_OFFSET)
+#define psci_lock_release(node)		bakery_lock_release((node)->aff_map_index,\
+						CPU_DATA_PSCI_LOCK_OFFSET)
 #endif
 
+/*
+ * The PSCI capability which are provided by the generic code but does not
+ * depend on the platform or spd capabilities.
+ */
+#define PSCI_GENERIC_CAP	\
+			(define_psci_cap(PSCI_VERSION) |		\
+			define_psci_cap(PSCI_AFFINITY_INFO_AARCH64) |	\
+			define_psci_cap(PSCI_FEATURES))
+
+/*
+ * The PSCI capabilities mask for 64 bit functions.
+ */
+#define PSCI_CAP_64BIT_MASK	\
+			(define_psci_cap(PSCI_CPU_SUSPEND_AARCH64) |	\
+			define_psci_cap(PSCI_CPU_ON_AARCH64) |		\
+			define_psci_cap(PSCI_AFFINITY_INFO_AARCH64) |	\
+			define_psci_cap(PSCI_MIG_AARCH64) |		\
+			define_psci_cap(PSCI_MIG_INFO_UP_CPU_AARCH64))
+
+
 /*******************************************************************************
  * The following two data structures hold the topology tree which in turn tracks
  * the state of the all the affinity instances supported by the platform.
  ******************************************************************************/
 typedef struct aff_map_node {
 	unsigned long mpidr;
-	unsigned short ref_count;
+	unsigned char ref_count;
 	unsigned char state;
 	unsigned char level;
+#if USE_COHERENT_MEM
 	bakery_lock_t lock;
+#else
+	/* For indexing the bakery_info array in per CPU data */
+	unsigned char aff_map_index;
+#endif
 } aff_map_node_t;
 
 typedef struct aff_limits_node {
@@ -61,13 +95,14 @@
 } aff_limits_node_t;
 
 typedef aff_map_node_t (*mpidr_aff_map_nodes_t[MPIDR_MAX_AFFLVL + 1]);
-typedef unsigned int (*afflvl_power_on_finisher_t)(aff_map_node_t *);
+typedef void (*afflvl_power_on_finisher_t)(aff_map_node_t *);
 
 /*******************************************************************************
  * Data prototypes
  ******************************************************************************/
 extern const plat_pm_ops_t *psci_plat_pm_ops;
 extern aff_map_node_t psci_aff_map[PSCI_NUM_AFFS];
+extern uint32_t psci_caps;
 
 /*******************************************************************************
  * SPD's power management hooks registered with PSCI
@@ -88,9 +123,8 @@
 void psci_afflvl_power_on_finish(int,
 				int,
 				afflvl_power_on_finisher_t *);
-int psci_save_ns_entry(uint64_t mpidr,
-		       uint64_t entrypoint, uint64_t context_id,
-		       uint32_t caller_scr_el3, uint32_t caller_sctlr_el1);
+int psci_get_ns_ep_info(entry_point_info_t *ep,
+		       uint64_t entrypoint, uint64_t context_id);
 int psci_check_afflvl_range(int start_afflvl, int end_afflvl);
 void psci_do_afflvl_state_mgmt(uint32_t start_afflvl,
 			       uint32_t end_afflvl,
@@ -107,6 +141,7 @@
 uint32_t psci_find_max_phys_off_afflvl(uint32_t start_afflvl,
 				       uint32_t end_afflvl,
 				       aff_map_node_t *mpidr_nodes[]);
+int psci_spd_migrate_info(uint64_t *mpidr);
 
 /* Private exported functions from psci_setup.c */
 int psci_get_aff_map_nodes(unsigned long mpidr,
@@ -116,21 +151,19 @@
 aff_map_node_t *psci_get_aff_map_node(unsigned long, int);
 
 /* Private exported functions from psci_affinity_on.c */
-int psci_afflvl_on(unsigned long,
-			unsigned long,
-			unsigned long,
-			int,
-			int);
+int psci_afflvl_on(unsigned long target_cpu,
+		   entry_point_info_t *ep,
+		   int start_afflvl,
+		   int end_afflvl);
 
 /* Private exported functions from psci_affinity_off.c */
 int psci_afflvl_off(int, int);
 
 /* Private exported functions from psci_affinity_suspend.c */
-int psci_afflvl_suspend(unsigned long,
-			unsigned long,
-			unsigned int,
-			int,
-			int);
+void psci_afflvl_suspend(entry_point_info_t *ep,
+			int start_afflvl,
+			int end_afflvl);
+
 unsigned int psci_afflvl_suspend_finish(int, int);
 void psci_set_suspend_power_state(unsigned int power_state);
 
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index e0bc833..02a8786 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -57,6 +57,12 @@
  ******************************************************************************/
 static aff_limits_node_t psci_aff_limits[MPIDR_MAX_AFFLVL + 1];
 
+/******************************************************************************
+ * Define the psci capability variable.
+ *****************************************************************************/
+uint32_t psci_caps;
+
+
 /*******************************************************************************
  * Routines for retrieving the node corresponding to an affinity level instance
  * in the mpidr. The first one uses binary search to find the node corresponding
@@ -181,7 +187,7 @@
 	uint32_t linear_id;
 	psci_aff_map[idx].mpidr = mpidr;
 	psci_aff_map[idx].level = level;
-	bakery_lock_init(&psci_aff_map[idx].lock);
+	psci_lock_init(psci_aff_map, idx);
 
 	/*
 	 * If an affinity instance is present then mark it as OFF to begin with.
@@ -331,13 +337,20 @@
 					       afflvl);
 	}
 
+#if !USE_COHERENT_MEM
+	/*
+	 * The psci_aff_map only needs flushing when it's not allocated in
+	 * coherent memory.
+	 */
+	flush_dcache_range((uint64_t) &psci_aff_map, sizeof(psci_aff_map));
+#endif
+
 	/*
 	 * Set the bounds for the affinity counts of each level in the map. Also
 	 * flush out the entire array so that it's visible to subsequent power
-	 * management operations. The 'psci_aff_map' array is allocated in
-	 * coherent memory so does not need flushing. The 'psci_aff_limits'
-	 * array is allocated in normal memory. It will be accessed when the mmu
-	 * is off e.g. after reset. Hence it needs to be flushed.
+	 * management operations. The 'psci_aff_limits' array is allocated in
+	 * normal memory. It will be accessed when the mmu is off e.g. after
+	 * reset. Hence it needs to be flushed.
 	 */
 	for (afflvl = MPIDR_AFFLVL0; afflvl < max_afflvl; afflvl++) {
 		psci_aff_limits[afflvl].min =
@@ -365,5 +378,19 @@
 	platform_setup_pm(&psci_plat_pm_ops);
 	assert(psci_plat_pm_ops);
 
+	/* Initialize the psci capability */
+	psci_caps = PSCI_GENERIC_CAP;
+
+	if (psci_plat_pm_ops->affinst_off)
+		psci_caps |=  define_psci_cap(PSCI_CPU_OFF);
+	if (psci_plat_pm_ops->affinst_on && psci_plat_pm_ops->affinst_on_finish)
+		psci_caps |=  define_psci_cap(PSCI_CPU_ON_AARCH64);
+	if (psci_plat_pm_ops->affinst_suspend && psci_plat_pm_ops->affinst_suspend_finish)
+		psci_caps |=  define_psci_cap(PSCI_CPU_SUSPEND_AARCH64);
+	if (psci_plat_pm_ops->system_off)
+		psci_caps |=  define_psci_cap(PSCI_SYSTEM_OFF);
+	if (psci_plat_pm_ops->system_reset)
+		psci_caps |=  define_psci_cap(PSCI_SYSTEM_RESET);
+
 	return 0;
 }
diff --git a/services/std_svc/psci/psci_system_off.c b/services/std_svc/psci/psci_system_off.c
index f2520b6..970d4bb 100644
--- a/services/std_svc/psci/psci_system_off.c
+++ b/services/std_svc/psci/psci_system_off.c
@@ -30,20 +30,17 @@
 
 #include <stddef.h>
 #include <arch_helpers.h>
+#include <assert.h>
 #include <debug.h>
 #include <platform.h>
 #include "psci_private.h"
 
 void psci_system_off(void)
 {
-	/* Check platform support */
-	if (!psci_plat_pm_ops->system_off) {
-		ERROR("Platform has not exported a PSCI System Off hook.\n");
-		panic();
-	}
-
 	psci_print_affinity_map();
 
+	assert(psci_plat_pm_ops->system_off);
+
 	/* Notify the Secure Payload Dispatcher */
 	if (psci_spd_pm && psci_spd_pm->svc_system_off) {
 		psci_spd_pm->svc_system_off();
@@ -57,14 +54,10 @@
 
 void psci_system_reset(void)
 {
-	/* Check platform support */
-	if (!psci_plat_pm_ops->system_reset) {
-		ERROR("Platform has not exported a PSCI System Reset hook.\n");
-		panic();
-	}
-
 	psci_print_affinity_map();
 
+	assert(psci_plat_pm_ops->system_reset);
+
 	/* Notify the Secure Payload Dispatcher */
 	if (psci_spd_pm && psci_spd_pm->svc_system_reset) {
 		psci_spd_pm->svc_system_reset();