AArch32: Fix the stack alignment issue

The AArch32 Procedure call Standard mandates that the stack must be aligned
to 8 byte boundary at external interfaces. This patch does the required
changes.

This problem was detected when a crash was encountered in
`psci_print_power_domain_map()` while printing 64 bit values. Aligning
the stack to 8 byte boundary resolved the problem.

Fixes ARM-Software/tf-issues#437

Change-Id: I517bd8203601bb88e9311bd36d477fb7b3efb292
Signed-off-by: Soby Mathew <soby.mathew@arm.com>
diff --git a/lib/cpus/aarch32/cortex_a32.S b/lib/cpus/aarch32/cortex_a32.S
index b51f997..f2b85a3 100644
--- a/lib/cpus/aarch32/cortex_a32.S
+++ b/lib/cpus/aarch32/cortex_a32.S
@@ -72,7 +72,8 @@
 	 * ----------------------------------------------------
 	 */
 func cortex_a32_core_pwr_dwn
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 
 	/* Assert if cache is enabled */
 #if ASM_ASSERTION
@@ -92,7 +93,7 @@
 	 * Come out of intra cluster coherency
 	 * ---------------------------------------------
 	 */
-	pop	{lr}
+	pop	{r12, lr}
 	b	cortex_a32_disable_smp
 endfunc cortex_a32_core_pwr_dwn
 
@@ -102,7 +103,8 @@
 	 * -------------------------------------------------------
 	 */
 func cortex_a32_cluster_pwr_dwn
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 
 	/* Assert if cache is enabled */
 #if ASM_ASSERTION
@@ -135,7 +137,7 @@
 	 * Come out of intra cluster coherency
 	 * ---------------------------------------------
 	 */
-	pop	{lr}
+	pop	{r12, lr}
 	b	cortex_a32_disable_smp
 endfunc cortex_a32_cluster_pwr_dwn
 
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
index 042ffbd..a4dfe5f 100644
--- a/lib/cpus/aarch32/cpu_helpers.S
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -76,9 +76,10 @@
 	 */
 	.globl	prepare_core_pwr_dwn
 func prepare_core_pwr_dwn
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 	bl	_cpu_data
-	pop	{lr}
+	pop	{r12, lr}
 
 	ldr	r1, [r0, #CPU_DATA_CPU_OPS_PTR]
 #if ASM_ASSERTION
@@ -98,9 +99,10 @@
 	 */
 	.globl	prepare_cluster_pwr_dwn
 func prepare_cluster_pwr_dwn
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 	bl	_cpu_data
-	pop	{lr}
+	pop	{r12, lr}
 
 	ldr	r1, [r0, #CPU_DATA_CPU_OPS_PTR]
 #if ASM_ASSERTION
diff --git a/lib/el3_runtime/aarch32/cpu_data.S b/lib/el3_runtime/aarch32/cpu_data.S
index b97911f..2de9029 100644
--- a/lib/el3_runtime/aarch32/cpu_data.S
+++ b/lib/el3_runtime/aarch32/cpu_data.S
@@ -41,9 +41,10 @@
  * -----------------------------------------------------------------
  */
 func _cpu_data
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 	bl	plat_my_core_pos
-	pop	{lr}
+	pop	{r12, lr}
 	b	_cpu_data_by_index
 endfunc _cpu_data
 
diff --git a/lib/psci/aarch32/psci_helpers.S b/lib/psci/aarch32/psci_helpers.S
index 36d5d7d..373c184 100644
--- a/lib/psci/aarch32/psci_helpers.S
+++ b/lib/psci/aarch32/psci_helpers.S
@@ -93,7 +93,8 @@
  * -----------------------------------------------------------------------
  */
 func psci_do_pwrup_cache_maintenance
-	push	{lr}
+	/* r12 is pushed to meet the 8 byte stack alignment requirement */
+	push	{r12, lr}
 
 	/* ---------------------------------------------
 	 * Ensure any inflight stack writes have made it
@@ -123,7 +124,7 @@
 	stcopr	r0, SCTLR
 	isb
 
-	pop	{pc}
+	pop	{r12, pc}
 endfunc psci_do_pwrup_cache_maintenance
 
 	/* ---------------------------------------------