Import data cache clean and invalidation helpers

As a follow-up to 8077591cc5154a replace earlier cache maintenance
functions by ones imported from TF-A project (data cache clean and
invalidation), from the following files:
https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/tree/
lib/aarch64/cache_helpers.S?h=v2.4
https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/tree/
include/arch/aarch64/asm_macros.S?h=v2.4

Change-Id: Iab55bd2cfa006507f811c6f396db3a8fe3160580
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/src/arch/aarch64/BUILD.gn b/src/arch/aarch64/BUILD.gn
index fb21d78..b88b755 100644
--- a/src/arch/aarch64/BUILD.gn
+++ b/src/arch/aarch64/BUILD.gn
@@ -20,7 +20,7 @@
 source_set("arch") {
   public_configs = [ "//src/arch/aarch64:arch_config" ]
   sources = [
-    "cache.c",
+    "cache_helpers.S",
     "irq.c",
     "mm.c",
     "sysregs.c",
diff --git a/src/arch/aarch64/cache.c b/src/arch/aarch64/cache.c
deleted file mode 100644
index 279161b..0000000
--- a/src/arch/aarch64/cache.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2021 The Hafnium Authors.
- *
- * Use of this source code is governed by a BSD-style
- * license that can be found in the LICENSE file or at
- * https://opensource.org/licenses/BSD-3-Clause.
- */
-
-#include "hf/arch/cache.h"
-
-#include "hf/arch/barriers.h"
-
-#include "hf/addr.h"
-
-#include "msr.h"
-
-/**
- * Return the cache line size for the cache level at the point of coherency.
- */
-static size_t arch_cache_line_size_get(void)
-{
-	uint64_t level_of_coherency;
-	uint64_t line_size;
-
-	/* Get the level of coherence for the cache hierarchy. */
-	level_of_coherency = read_msr(clidr_el1);
-	level_of_coherency = (level_of_coherency >> 24) & 3;
-
-	/* Select required level of cache. */
-	write_msr(csselr_el1, (level_of_coherency - 1) << 1);
-
-	/* Get line size such that cache_line_size = 2^(line_size + 4). */
-	line_size = read_msr(ccsidr_el1) & 3;
-
-	return (1 << (line_size + 4));
-}
-
-/**
- * Clean the cache to the point of coherency for the range qualified by the
- * start address and size arguments.
- */
-void arch_cache_clean_range(vaddr_t start, size_t size)
-{
-	size_t cache_line_size = arch_cache_line_size_get();
-	uintvaddr_t begin = va_addr(start);
-	uintvaddr_t end = begin + size;
-	uintvaddr_t address;
-
-	for (address = begin; address < end; address += cache_line_size) {
-		/* Cache clean by VA to PoC */
-		__asm__ volatile("dc cvac, %0" : : "r"(address));
-	}
-
-	memory_ordering_barrier();
-}
diff --git a/src/arch/aarch64/cache_helpers.S b/src/arch/aarch64/cache_helpers.S
new file mode 100644
index 0000000..896da19
--- /dev/null
+++ b/src/arch/aarch64/cache_helpers.S
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2021 The Hafnium Authors.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file or at
+ * https://opensource.org/licenses/BSD-3-Clause.
+ */
+
+/**
+ * Macros and functions imported from TF-A project:
+ * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/tree/include/arch/aarch64/asm_macros.S?h=v2.4
+ * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/tree/lib/aarch64/cache_helpers.S?h=v2.4
+ */
+
+.macro	dcache_line_size  reg, tmp
+	mrs	\tmp, ctr_el0
+	ubfx	\tmp, \tmp, #16, #4
+	mov	\reg, #4
+	lsl	\reg, \reg, \tmp
+.endm
+
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
+	/* Exit early if size is zero */
+	cbz	x1, exit_loop_\op
+	dcache_line_size x2, x3
+	add	x1, x0, x1
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+loop_\op:
+	dc	\op, x0
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	loop_\op
+	dsb	sy
+exit_loop_\op:
+	ret
+.endm
+
+/**
+ * ------------------------------------------
+ * Invalidate from base address till
+ * size. 'x0' = addr, 'x1' = size
+ * ------------------------------------------
+ */
+.globl arch_cache_data_invalidate_range
+arch_cache_data_invalidate_range:
+	do_dcache_maintenance_by_mva ivac
+
+/**
+ * ------------------------------------------
+ * Clean from base address till size.
+ * 'x0' = addr, 'x1' = size
+ * ------------------------------------------
+ */
+.globl arch_cache_data_clean_range
+arch_cache_data_clean_range:
+	do_dcache_maintenance_by_mva cvac
diff --git a/src/arch/fake/cache.c b/src/arch/fake/cache.c
index c4e7800..5c45203 100644
--- a/src/arch/fake/cache.c
+++ b/src/arch/fake/cache.c
@@ -8,7 +8,13 @@
 
 #include "hf/arch/cache.h"
 
-void arch_cache_clean_range(vaddr_t start, size_t size)
+void arch_cache_data_clean_range(vaddr_t start, size_t size)
+{
+	(void)start;
+	(void)size;
+}
+
+void arch_cache_data_invalidate_range(vaddr_t start, size_t size)
 {
 	(void)start;
 	(void)size;
diff --git a/src/cpu.c b/src/cpu.c
index 42c2428..bc26801 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -119,9 +119,9 @@
 	 * hitting the entry point can read the cpus array consistently
 	 * with MMU off (hence data cache off).
 	 */
-	arch_cache_clean_range(va_from_ptr(cpus), sizeof(cpus));
+	arch_cache_data_clean_range(va_from_ptr(cpus), sizeof(cpus));
 
-	arch_cache_clean_range(va_from_ptr(&cpu_count), sizeof(cpu_count));
+	arch_cache_data_clean_range(va_from_ptr(&cpu_count), sizeof(cpu_count));
 }
 
 size_t cpu_index(struct cpu *c)