refactor(psci): unify coherency exit between AArch64 and AArch32

The procedure is fairly simple: if we have hardware assisted coherency,
call into the cpu driver and let it do its thing. If we don't, then we
must turn data caches off, handle the confusion that causes with the
stack, and call into the cpu driver which will flush the caches that
need flushing.

On AArch32 the above happens in common code. On AArch64, however, the
turning off of the caches happens in the cpu driver. Since we're dealing
with the stack, we must exercise control over it and implement this in
assembly. But as the two implementations are nominally different (in the
ordering of operations), the part that is in assembly is quite large as
jumping back to C to handle the difference might involve the stack.

Presumably, the AArch difference was introduced in order to cater for a
possible implementation where turning off the caches requires an IMP DEF
sequence. Well, Arm no longer makes cores without hardware assisted
coherency, so this eventually is not possible.

So take this part out of the cpu driver and put it into common code,
just like in AArch32. With this, there is no longer a need call
prepare_cpu_pwr_dwn() in a different order either - we can delay it a
bit to happen after the stack management. So the two AArch-s flows
become identical. We can convert prepare_cpu_pwr_dwn() to C and leave
psci_do_pwrdown_cache_maintenance() only to exercise control over stack.

Change-Id: Ie4759ebe20bb74b60533c6a47dbc2b101875900f
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/lib/psci/aarch32/psci_helpers.S b/lib/psci/aarch32/psci_helpers.S
index 929af8c..493715a 100644
--- a/lib/psci/aarch32/psci_helpers.S
+++ b/lib/psci/aarch32/psci_helpers.S
@@ -12,41 +12,45 @@
 	.globl	psci_do_pwrup_cache_maintenance
 
 /* -----------------------------------------------------------------------
- * void psci_do_pwrdown_cache_maintenance(unsigned int power level);
+ * void psci_do_pwrdown_cache_maintenance(void);
  *
- * This function performs cache maintenance for the specified power
- * level. The levels of cache affected are determined by the power
- * level which is passed as the argument i.e. level 0 results
- * in a flush of the L1 cache. Both the L1 and L2 caches are flushed
- * for a higher power level.
- *
- * Additionally, this function also ensures that stack memory is correctly
- * flushed out to avoid coherency issues due to a change in its memory
- * attributes after the data cache is disabled.
+ * This function turns off data caches and also ensures that stack memory
+ * is correctly flushed out to avoid coherency issues due to a change in
+ * its memory attributes.
  * -----------------------------------------------------------------------
  */
 func psci_do_pwrdown_cache_maintenance
 	push	{r4, lr}
+	bl	plat_get_my_stack
 
-	/* ----------------------------------------------
-	 * Turn OFF cache and do stack maintenance
-	 * prior to cpu operations . This sequence is
-	 * different from AArch64 because in AArch32 the
-	 * assembler routines for cpu operations utilize
-	 * the stack whereas in AArch64 it doesn't.
-	 * ----------------------------------------------
-	 */
-	mov	r4, r0
-	bl	do_stack_maintenance
+	/* Turn off the D-cache */
+	ldcopr	r1, SCTLR
+	bic	r1, #SCTLR_C_BIT
+	stcopr	r1, SCTLR
+	isb
 
 	/* ---------------------------------------------
-	 * Invoke CPU-specifc power down operations for
-	 * the appropriate level
+	 * Calculate and store the size of the used
+	 * stack memory in r1.
 	 * ---------------------------------------------
 	 */
-	mov	r0, r4
-	pop	{r4, lr}
-	b	prepare_cpu_pwr_dwn
+	mov	r4, r0
+	mov	r1, sp
+	sub	r1, r0, r1
+	mov	r0, sp
+	bl	flush_dcache_range
+
+	/* ---------------------------------------------
+	 * Calculate and store the size of the unused
+	 * stack memory in r1. Calculate and store the
+	 * stack base address in r0.
+	 * ---------------------------------------------
+	 */
+	sub	r0, r4, #PLATFORM_STACK_SIZE
+	sub	r1, sp, r0
+	bl	inv_dcache_range
+
+	pop	{r4, pc}
 endfunc psci_do_pwrdown_cache_maintenance
 
 
@@ -92,44 +96,3 @@
 
 	pop	{r12, pc}
 endfunc psci_do_pwrup_cache_maintenance
-
-	/* ---------------------------------------------
-	 * void do_stack_maintenance(void)
-	 * Do stack maintenance by flushing the used
-	 * stack to the main memory and invalidating the
-	 * remainder.
-	 * ---------------------------------------------
-	 */
-func do_stack_maintenance
-	push	{r4, lr}
-	bl	plat_get_my_stack
-
-	/* Turn off the D-cache */
-	ldcopr	r1, SCTLR
-	bic	r1, #SCTLR_C_BIT
-	stcopr	r1, SCTLR
-	isb
-
-	/* ---------------------------------------------
-	 * Calculate and store the size of the used
-	 * stack memory in r1.
-	 * ---------------------------------------------
-	 */
-	mov	r4, r0
-	mov	r1, sp
-	sub	r1, r0, r1
-	mov	r0, sp
-	bl	flush_dcache_range
-
-	/* ---------------------------------------------
-	 * Calculate and store the size of the unused
-	 * stack memory in r1. Calculate and store the
-	 * stack base address in r0.
-	 * ---------------------------------------------
-	 */
-	sub	r0, r4, #PLATFORM_STACK_SIZE
-	sub	r1, sp, r0
-	bl	inv_dcache_range
-
-	pop	{r4, pc}
-endfunc do_stack_maintenance
diff --git a/lib/psci/aarch64/psci_helpers.S b/lib/psci/aarch64/psci_helpers.S
index 4da2f69..ce8adc2 100644
--- a/lib/psci/aarch64/psci_helpers.S
+++ b/lib/psci/aarch64/psci_helpers.S
@@ -14,29 +14,22 @@
 	.globl	psci_do_pwrup_cache_maintenance
 
 /* -----------------------------------------------------------------------
- * void psci_do_pwrdown_cache_maintenance(unsigned int power level);
+ * void psci_do_pwrdown_cache_maintenance(void);
  *
- * This function performs cache maintenance for the specified power
- * level. The levels of cache affected are determined by the power
- * level which is passed as the argument i.e. level 0 results
- * in a flush of the L1 cache. Both the L1 and L2 caches are flushed
- * for a higher power level.
- *
- * Additionally, this function also ensures that stack memory is correctly
- * flushed out to avoid coherency issues due to a change in its memory
- * attributes after the data cache is disabled.
+ * This function turns off data caches and also ensures that stack memory
+ * is correctly flushed out to avoid coherency issues due to a change in
+ * its memory attributes.
  * -----------------------------------------------------------------------
  */
 func psci_do_pwrdown_cache_maintenance
 	stp     x29, x30, [sp,#-16]!
 	stp     x19, x20, [sp,#-16]!
 
-	/* ---------------------------------------------
-	 * Invoke CPU-specific power down operations for
-	 * the appropriate level
-	 * ---------------------------------------------
-	 */
-	bl	prepare_cpu_pwr_dwn
+	/* Disable L1 data cache and unified L2 cache */
+	mrs	x1, sctlr_el3
+	bic	x1, x1, #SCTLR_C_BIT
+	msr	sctlr_el3, x1
+	isb
 
 	/* ---------------------------------------------
 	 * Do stack maintenance by flushing the used
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 7c83a79..15a32f1 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -1196,6 +1196,26 @@
 	return (n_valid > 1U) ? 1 : 0;
 }
 
+static void call_cpu_pwr_dwn(unsigned int power_level)
+{
+	struct cpu_ops *ops = get_cpu_data(cpu_ops_ptr);
+
+	/* Call the last available power down handler */
+	if (power_level > CPU_MAX_PWR_DWN_OPS - 1) {
+		power_level = CPU_MAX_PWR_DWN_OPS - 1;
+	}
+
+	assert(ops != NULL);
+	assert(ops->pwr_dwn_ops[power_level] != NULL);
+
+	return ops->pwr_dwn_ops[power_level]();
+}
+
+static void prepare_cpu_pwr_dwn(unsigned int power_level)
+{
+	call_cpu_pwr_dwn(power_level);
+}
+
 /*******************************************************************************
  * Initiate power down sequence, by calling power down operations registered for
  * this CPU.
@@ -1213,26 +1233,24 @@
 		PMF_CACHE_MAINT);
 #endif
 
-#if HW_ASSISTED_COHERENCY
+#if !HW_ASSISTED_COHERENCY
 	/*
-	 * With hardware-assisted coherency, the CPU drivers only initiate the
-	 * power down sequence, without performing cache-maintenance operations
-	 * in software. Data caches enabled both before and after this call.
-	 */
-	prepare_cpu_pwr_dwn(power_level);
-#else
-	/*
-	 * Without hardware-assisted coherency, the CPU drivers disable data
-	 * caches, then perform cache-maintenance operations in software.
+	 * Disable data caching and handle the stack's cache maintenance.
 	 *
-	 * This also calls prepare_cpu_pwr_dwn() to initiate power down
-	 * sequence, but that function will return with data caches disabled.
-	 * We must ensure that the stack memory is flushed out to memory before
-	 * we start popping from it again.
+	 * If the core can't automatically exit coherency, the cpu driver needs
+	 * to flush caches and exit coherency. We can't do this with data caches
+	 * enabled. The cpu driver will decide which caches to flush based on
+	 * the power level.
+	 *
+	 * If automatic coherency management is possible, we can keep data
+	 * caches on until the very end and let hardware do cache maintenance.
 	 */
-	psci_do_pwrdown_cache_maintenance(power_level);
+	psci_do_pwrdown_cache_maintenance();
 #endif
 
+	/* Initiate the power down sequence by calling into the cpu driver. */
+	prepare_cpu_pwr_dwn(power_level);
+
 #if ENABLE_RUNTIME_INSTRUMENTATION
 	PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
 		RT_INSTR_EXIT_CFLUSH,
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index f3f5a5c..d4c6415 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -323,13 +323,6 @@
 bool psci_is_last_on_cpu(unsigned int my_idx);
 int psci_spd_migrate_info(u_register_t *mpidr);
 
-/*
- * CPU power down is directly called only when HW_ASSISTED_COHERENCY is
- * available. Otherwise, this needs post-call stack maintenance, which is
- * handled in assembly.
- */
-void prepare_cpu_pwr_dwn(unsigned int power_level);
-
 /* This function applies various CPU errata during power down. */
 void apply_cpu_pwr_dwn_errata(void);
 
@@ -351,7 +344,7 @@
 void psci_cpu_suspend_to_powerdown_finish(unsigned int cpu_idx, unsigned int max_off_lvl, const psci_power_state_t *state_info);
 
 /* Private exported functions from psci_helpers.S */
-void psci_do_pwrdown_cache_maintenance(unsigned int pwr_level);
+void psci_do_pwrdown_cache_maintenance(void);
 void psci_do_pwrup_cache_maintenance(void);
 
 /* Private exported functions from psci_system_off.c */