refactor(psci): unify coherency exit between AArch64 and AArch32

The procedure is fairly simple: if we have hardware assisted coherency,
call into the cpu driver and let it do its thing. If we don't, then we
must turn data caches off, handle the confusion that causes with the
stack, and call into the cpu driver which will flush the caches that
need flushing.

On AArch32 the above happens in common code. On AArch64, however, the
turning off of the caches happens in the cpu driver. Since we're dealing
with the stack, we must exercise control over it and implement this in
assembly. But as the two implementations are nominally different (in the
ordering of operations), the part that is in assembly is quite large as
jumping back to C to handle the difference might involve the stack.

Presumably, the AArch difference was introduced in order to cater for a
possible implementation where turning off the caches requires an IMP DEF
sequence. Well, Arm no longer makes cores without hardware assisted
coherency, so this eventually is not possible.

So take this part out of the cpu driver and put it into common code,
just like in AArch32. With this, there is no longer a need call
prepare_cpu_pwr_dwn() in a different order either - we can delay it a
bit to happen after the stack management. So the two AArch-s flows
become identical. We can convert prepare_cpu_pwr_dwn() to C and leave
psci_do_pwrdown_cache_maintenance() only to exercise control over stack.

Change-Id: Ie4759ebe20bb74b60533c6a47dbc2b101875900f
Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
diff --git a/lib/cpus/aarch32/cpu_helpers.S b/lib/cpus/aarch32/cpu_helpers.S
index 83e3e49..863448c 100644
--- a/lib/cpus/aarch32/cpu_helpers.S
+++ b/lib/cpus/aarch32/cpu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2023, Arm Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2025, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -47,46 +47,7 @@
 
 #endif
 
-#ifdef IMAGE_BL32 /* The power down core and cluster is needed only in  BL32 */
-	/*
-	 * void prepare_cpu_pwr_dwn(unsigned int power_level)
-	 *
-	 * Prepare CPU power down function for all platforms. The function takes
-	 * a domain level to be powered down as its parameter. After the cpu_ops
-	 * pointer is retrieved from cpu_data, the handler for requested power
-	 * level is called.
-	 */
-	.globl	prepare_cpu_pwr_dwn
-func prepare_cpu_pwr_dwn
-	/*
-	 * If the given power level exceeds CPU_MAX_PWR_DWN_OPS, we call the
-	 * power down handler for the last power level
-	 */
-	mov	r2, #(CPU_MAX_PWR_DWN_OPS - 1)
-	cmp	r0, r2
-	movhi	r0, r2
-
-	push	{r0, lr}
-	bl	_cpu_data
-	pop	{r2, lr}
-
-	ldr	r0, [r0, #CPU_DATA_CPU_OPS_PTR]
-#if ENABLE_ASSERTIONS
-	cmp	r0, #0
-	ASM_ASSERT(ne)
-#endif
-
-	/* Get the appropriate power down handler */
-	mov	r1, #CPU_PWR_DWN_OPS
-	add	r1, r1, r2, lsl #2
-	ldr	r1, [r0, r1]
-#if ENABLE_ASSERTIONS
-	cmp	r1, #0
-	ASM_ASSERT(ne)
-#endif
-	bx	r1
-endfunc prepare_cpu_pwr_dwn
-
+#ifdef IMAGE_BL32
 	/*
 	 * Initializes the cpu_ops_ptr if not already initialized
 	 * in cpu_data. This must only be called after the data cache
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
index 9843943..243f657 100644
--- a/lib/cpus/aarch64/aem_generic.S
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -12,15 +12,6 @@
 
 func aem_generic_core_pwr_dwn
 	/* ---------------------------------------------
-	 * Disable the Data Cache.
-	 * ---------------------------------------------
-	 */
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-
-	/* ---------------------------------------------
 	 * AEM model supports L3 caches in which case L2
 	 * will be private per core caches and flush
 	 * from L1 to L2 is not sufficient.
@@ -60,15 +51,6 @@
 
 func aem_generic_cluster_pwr_dwn
 	/* ---------------------------------------------
-	 * Disable the Data Cache.
-	 * ---------------------------------------------
-	 */
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-
-	/* ---------------------------------------------
 	 * Flush all caches to PoC.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cortex_a35.S b/lib/cpus/aarch64/cortex_a35.S
index 40e6200..bb354df 100644
--- a/lib/cpus/aarch64/cortex_a35.S
+++ b/lib/cpus/aarch64/cortex_a35.S
@@ -13,16 +13,6 @@
 
 cpu_reset_prologue cortex_a35
 	/* ---------------------------------------------
-	 * Disable L1 data cache and unified L2 cache
-	 * ---------------------------------------------
-	 */
-func cortex_a35_disable_dcache
-	sysreg_bit_clear sctlr_el3, SCTLR_C_BIT
-	isb
-	ret
-endfunc cortex_a35_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
 	 */
@@ -55,12 +45,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a35_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
@@ -79,12 +63,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a35_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index dbfff87..e3b69ab 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -15,16 +15,6 @@
 cpu_reset_prologue cortex_a53
 
 	/* ---------------------------------------------
-	 * Disable L1 data cache and unified L2 cache
-	 * ---------------------------------------------
-	 */
-func cortex_a53_disable_dcache
-	sysreg_bit_clear sctlr_el3, SCTLR_C_BIT
-	isb
-	ret
-endfunc cortex_a53_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
 	 */
@@ -144,12 +134,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a53_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
@@ -168,12 +152,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a53_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cortex_a57.S b/lib/cpus/aarch64/cortex_a57.S
index 4a61187..18521a2 100644
--- a/lib/cpus/aarch64/cortex_a57.S
+++ b/lib/cpus/aarch64/cortex_a57.S
@@ -16,16 +16,6 @@
 cpu_reset_prologue cortex_a57
 
 	/* ---------------------------------------------
-	 * Disable L1 data cache and unified L2 cache
-	 * ---------------------------------------------
-	 */
-func cortex_a57_disable_dcache
-	sysreg_bit_clear sctlr_el3, SCTLR_C_BIT
-	isb
-	ret
-endfunc cortex_a57_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable all types of L2 prefetches.
 	 * ---------------------------------------------
 	 */
@@ -200,12 +190,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a57_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable the L2 prefetches.
 	 * ---------------------------------------------
 	 */
@@ -240,12 +224,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a57_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable the L2 prefetches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cortex_a72.S b/lib/cpus/aarch64/cortex_a72.S
index 23b27ab..f35f867 100644
--- a/lib/cpus/aarch64/cortex_a72.S
+++ b/lib/cpus/aarch64/cortex_a72.S
@@ -18,18 +18,6 @@
 cpu_reset_prologue cortex_a72
 
 	/* ---------------------------------------------
-	 * Disable L1 data cache and unified L2 cache
-	 * ---------------------------------------------
-	 */
-func cortex_a72_disable_dcache
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-	ret
-endfunc cortex_a72_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable all types of L2 prefetches.
 	 * ---------------------------------------------
 	 */
@@ -177,12 +165,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a72_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable the L2 prefetches.
 	 * ---------------------------------------------
 	 */
@@ -223,12 +205,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a72_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable the L2 prefetches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cortex_a73.S b/lib/cpus/aarch64/cortex_a73.S
index 9cc6fdb..14f1ef8 100644
--- a/lib/cpus/aarch64/cortex_a73.S
+++ b/lib/cpus/aarch64/cortex_a73.S
@@ -13,16 +13,6 @@
 cpu_reset_prologue cortex_a73
 
 	/* ---------------------------------------------
-	 * Disable L1 data cache
-	 * ---------------------------------------------
-	 */
-func cortex_a73_disable_dcache
-	sysreg_bit_clear sctlr_el3, SCTLR_C_BIT
-	isb
-	ret
-endfunc cortex_a73_disable_dcache
-
-	/* ---------------------------------------------
 	 * Disable intra-cluster coherency
 	 * ---------------------------------------------
 	 */
@@ -123,12 +113,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a73_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
@@ -147,12 +131,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	cortex_a73_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/cpu_helpers.S b/lib/cpus/aarch64/cpu_helpers.S
index 105da5c..1b20d5c 100644
--- a/lib/cpus/aarch64/cpu_helpers.S
+++ b/lib/cpus/aarch64/cpu_helpers.S
@@ -14,44 +14,6 @@
 #include <lib/cpus/errata.h>
 #include <lib/el3_runtime/cpu_data.h>
 
-#ifdef IMAGE_BL31 /* The power down core and cluster is needed only in  BL31 */
-	/*
-	 * void prepare_cpu_pwr_dwn(unsigned int power_level)
-	 *
-	 * Prepare CPU power down function for all platforms. The function takes
-	 * a domain level to be powered down as its parameter. After the cpu_ops
-	 * pointer is retrieved from cpu_data, the handler for requested power
-	 * level is called.
-	 */
-	.globl	prepare_cpu_pwr_dwn
-func prepare_cpu_pwr_dwn
-	/*
-	 * If the given power level exceeds CPU_MAX_PWR_DWN_OPS, we call the
-	 * power down handler for the last power level
-	 */
-	mov_imm	x2, (CPU_MAX_PWR_DWN_OPS - 1)
-	cmp	x0, x2
-	csel	x2, x2, x0, hi
-
-	mrs	x1, tpidr_el3
-	ldr	x0, [x1, #CPU_DATA_CPU_OPS_PTR]
-#if ENABLE_ASSERTIONS
-	cmp	x0, #0
-	ASM_ASSERT(ne)
-#endif
-
-	/* Get the appropriate power down handler */
-	mov	x1, #CPU_PWR_DWN_OPS
-	add	x1, x1, x2, lsl #3
-	ldr	x1, [x0, x1]
-#if ENABLE_ASSERTIONS
-	cmp	x1, #0
-	ASM_ASSERT(ne)
-#endif
-	br	x1
-endfunc prepare_cpu_pwr_dwn
-
-
 	/*
 	 * Initializes the cpu_ops_ptr if not already initialized
 	 * in cpu_data. This can be called without a runtime stack, but may
@@ -70,7 +32,6 @@
 1:
 	ret
 endfunc init_cpu_ops
-#endif /* IMAGE_BL31 */
 
 #if defined(IMAGE_BL31) && CRASH_REPORTING
 	/*
diff --git a/lib/cpus/aarch64/generic.S b/lib/cpus/aarch64/generic.S
index 0a10eed..c59575c 100644
--- a/lib/cpus/aarch64/generic.S
+++ b/lib/cpus/aarch64/generic.S
@@ -13,28 +13,10 @@
 
 cpu_reset_prologue generic
 
-	/* ---------------------------------------------
-	 * Disable L1 data cache and unified L2 cache
-	 * ---------------------------------------------
-	 */
-func generic_disable_dcache
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-	ret
-endfunc generic_disable_dcache
-
 func generic_core_pwr_dwn
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	generic_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
@@ -48,12 +30,6 @@
 	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Turn off caches.
-	 * ---------------------------------------------
-	 */
-	bl	generic_disable_dcache
-
-	/* ---------------------------------------------
 	 * Flush L1 caches.
 	 * ---------------------------------------------
 	 */
diff --git a/lib/cpus/aarch64/qemu_max.S b/lib/cpus/aarch64/qemu_max.S
index a727379..7980066 100644
--- a/lib/cpus/aarch64/qemu_max.S
+++ b/lib/cpus/aarch64/qemu_max.S
@@ -12,15 +12,6 @@
 
 func qemu_max_core_pwr_dwn
 	/* ---------------------------------------------
-	 * Disable the Data Cache.
-	 * ---------------------------------------------
-	 */
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-
-	/* ---------------------------------------------
 	 * Flush L1 cache to L2.
 	 * ---------------------------------------------
 	 */
@@ -33,15 +24,6 @@
 
 func qemu_max_cluster_pwr_dwn
 	/* ---------------------------------------------
-	 * Disable the Data Cache.
-	 * ---------------------------------------------
-	 */
-	mrs	x1, sctlr_el3
-	bic	x1, x1, #SCTLR_C_BIT
-	msr	sctlr_el3, x1
-	isb
-
-	/* ---------------------------------------------
 	 * Flush all caches to PoC.
 	 * ---------------------------------------------
 	 */