feat(mte): move stacks to a separate memory region

Separates CPU stack memory region to apply different memory attribute to
the stack region specifically. This memory region is marked as "Normal
Inner Write-Back, Outer Write-Back, Read-Allocate, Write-Allocate
Non-transient memory." for now.
Zeroes the stack section on the image entry.

Signed-off-by: Maksims Svecovs <maksims.svecovs@arm.com>
Change-Id: Iaa68c7d16cae73c31e3741f94d2c6c85ca457b19
diff --git a/build/image/image.ld b/build/image/image.ld
index c026974..7b21f25 100644
--- a/build/image/image.ld
+++ b/build/image/image.ld
@@ -126,7 +126,7 @@
 	. = ALIGN(4096);
 	data_begin = .;
 	.data : {
-		*(.data)
+		*(.data*)
 	}
 	/*
 	 * Global offset table used for relocations. This is where relocation
@@ -157,6 +157,12 @@
 	. = ALIGN(4096);
 	data_end = .;
 
+	.stacks : {
+		stacks_begin = .;
+		*(.stacks)
+		stacks_end = .;
+	}
+
 	/*
 	 * Remove unused sections from the image.
 	 */
diff --git a/inc/hf/arch/init.h b/inc/hf/arch/init.h
index 5205e4e..6063975 100644
--- a/inc/hf/arch/init.h
+++ b/inc/hf/arch/init.h
@@ -8,6 +8,9 @@
 
 #pragma once
 
+#include "hf/mm.h"
+#include "hf/mpool.h"
+
 /**
  * Performs arch specific boot time initialization.
  *
@@ -15,3 +18,11 @@
  * possible.
  */
 void arch_one_time_init(void);
+
+/**
+ * Updates the hypervisor page table such that the stack address range
+ * is mapped into the address space at the corresponding address range in the
+ * architecture-specific mode.
+ */
+bool arch_stack_mm_init(struct mm_stage1_locked stage1_locked,
+			struct mpool *ppool);
diff --git a/inc/hf/layout.h b/inc/hf/layout.h
index 8ad05a9..ee32b35 100644
--- a/inc/hf/layout.h
+++ b/inc/hf/layout.h
@@ -22,6 +22,9 @@
 paddr_t layout_data_begin(void);
 paddr_t layout_data_end(void);
 
+paddr_t layout_stacks_begin(void);
+paddr_t layout_stacks_end(void);
+
 paddr_t layout_initrd_begin(void);
 paddr_t layout_initrd_end(void);
 
diff --git a/inc/system/sys/cdefs.h b/inc/system/sys/cdefs.h
index f1eeb86..b0a02ed 100644
--- a/inc/system/sys/cdefs.h
+++ b/inc/system/sys/cdefs.h
@@ -8,11 +8,4 @@
 
 #pragma once
 
-/*
- * Empty file to make Android Clang stdatomic.h happy. It includes this internal
- * glibc header which we don't have, but doesn't actually need it.
- * TODO: Investigate why Android have replaced the upstream Clang version of
- * stdatomic.h with one that appears to be from FreeBSD, possibly via Bionic, in
- * their prebuilt version of Clang. If we can just use the upstream Clang we can
- * probably remove this workaround.
- */
+#define __section(S) __attribute__((__section__(#S)))
diff --git a/src/arch/aarch64/entry.S b/src/arch/aarch64/entry.S
index 9194c5f..5688628 100644
--- a/src/arch/aarch64/entry.S
+++ b/src/arch/aarch64/entry.S
@@ -72,5 +72,18 @@
 	stp xzr, xzr, [x29], #16
 	b 3b
 
+	/* Zero out the stack section. */
+4:	adrp x29, stacks_begin
+	add x29, x29, :lo12:stacks_begin
+
+	adrp x30, stacks_end
+	add x30, x30, :lo12:stacks_end
+
+5:	cmp x29, x30
+	b.hs 6f
+
+	stp xzr, xzr, [x29], #16
+	b 5b
+
 	/* Branch to the entry point for the specific image. */
-4:	b image_entry
+6:	b image_entry
diff --git a/src/arch/aarch64/hypervisor/arch_init.c b/src/arch/aarch64/hypervisor/arch_init.c
index c2dedbf..4777d60 100644
--- a/src/arch/aarch64/hypervisor/arch_init.c
+++ b/src/arch/aarch64/hypervisor/arch_init.c
@@ -6,8 +6,12 @@
  * https://opensource.org/licenses/BSD-3-Clause.
  */
 
+#include "hf/arch/init.h"
+#include "hf/arch/mmu.h"
 #include "hf/arch/plat/psci.h"
 
+#include "hf/layout.h"
+
 /**
  * Performs arch specific boot time initialization.
  */
@@ -15,3 +19,16 @@
 {
 	plat_psci_init();
 }
+
+/**
+ * Updates the hypervisor page table such that the stack address range
+ * is mapped into the address space at the corresponding address range in the
+ * architecture-specific mode.
+ */
+bool arch_stack_mm_init(struct mm_stage1_locked stage1_locked,
+			struct mpool *ppool)
+{
+	return mm_identity_map(stage1_locked, layout_stacks_begin(),
+			       layout_stacks_end(), MM_MODE_R | MM_MODE_W,
+			       ppool);
+}
diff --git a/src/arch/aarch64/inc/hf/arch/mmu.h b/src/arch/aarch64/inc/hf/arch/mmu.h
index 45bb2b5..8c45cdb 100644
--- a/src/arch/aarch64/inc/hf/arch/mmu.h
+++ b/src/arch/aarch64/inc/hf/arch/mmu.h
@@ -12,3 +12,6 @@
 
 /** Mapping mode defining MMU Stage-1 block/page non-secure bit */
 #define MM_MODE_NS UINT32_C(0x0080)
+
+/** Page mapping mode for tagged normal memory. */
+#define MM_MODE_T UINT32_C(0x0400)
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index d2307e2..35ba58e 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -50,6 +50,7 @@
 
 #define STAGE1_DEVICEINDX UINT64_C(0)
 #define STAGE1_NORMALINDX UINT64_C(1)
+#define STAGE1_STACKINDX UINT64_C(2)
 
 #define STAGE2_XN(x)      ((x) << 53)
 #define STAGE2_CONTIGUOUS (UINT64_C(1) << 52)
@@ -506,6 +507,8 @@
 	/* Define the memory attribute bits. */
 	if (mode & MM_MODE_D) {
 		attrs |= STAGE1_ATTRINDX(STAGE1_DEVICEINDX);
+	} else if (mode & MM_MODE_T) {
+		attrs |= STAGE1_ATTRINDX(STAGE1_STACKINDX);
 	} else {
 		attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
 	}
diff --git a/src/arch/fake/mm.c b/src/arch/fake/mm.c
index 2403926..b2eaf49 100644
--- a/src/arch/fake/mm.c
+++ b/src/arch/fake/mm.c
@@ -167,6 +167,13 @@
 	return attrs >> PTE_ATTR_MODE_SHIFT;
 }
 
+void arch_stack_mm_init(struct mm_stage1_locked stage1_locked,
+			struct mpool *ppool)
+{
+	(void)stage1_locked;
+	(void)ppool;
+}
+
 bool arch_mm_init(paddr_t table)
 {
 	/* No initialization required. */
diff --git a/src/cpu.c b/src/cpu.c
index bc26801..ea65f15 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -18,6 +18,8 @@
 
 #include "vmapi/hf/call.h"
 
+#include "system/sys/cdefs.h"
+
 #define STACK_SIZE PAGE_SIZE
 
 /**
@@ -28,7 +30,8 @@
  * happen, there may be coherency problems when the stack is being used before
  * caching is enabled.
  */
-alignas(PAGE_SIZE) static char callstacks[MAX_CPUS][STACK_SIZE];
+alignas(PAGE_SIZE) static char callstacks[MAX_CPUS][STACK_SIZE] __section(
+		.stacks);
 
 /* NOLINTNEXTLINE(misc-redundant-expression) */
 static_assert((STACK_SIZE % PAGE_SIZE) == 0, "Keep each stack page aligned.");
diff --git a/src/fdt_patch.c b/src/fdt_patch.c
index 4e7f535..d08c1cf 100644
--- a/src/fdt_patch.c
+++ b/src/fdt_patch.c
@@ -121,6 +121,8 @@
 	rsv &= add_mem_reservation(fdt, layout_rodata_begin(),
 				   layout_rodata_end());
 	rsv &= add_mem_reservation(fdt, layout_data_begin(), layout_data_end());
+	rsv &= add_mem_reservation(fdt, layout_stacks_begin(),
+				   layout_stacks_end());
 
 	/* Patch FDT to reserve memory for secondary VMs. */
 	for (i = 0; i < p->reserved_ranges_count; ++i) {
diff --git a/src/layout.c b/src/layout.c
index a92ec82..93f18a8 100644
--- a/src/layout.c
+++ b/src/layout.c
@@ -71,6 +71,26 @@
 }
 
 /**
+ * Get the address the .stacks section begins at.
+ */
+paddr_t layout_stacks_begin(void)
+{
+	extern uint8_t stacks_begin[];
+
+	return pa_init((uintpaddr_t)stacks_begin);
+}
+
+/**
+ * Get the address the .stacks section ends at.
+ */
+paddr_t layout_stacks_end(void)
+{
+	extern uint8_t stacks_end[];
+
+	return pa_init((uintpaddr_t)stacks_end);
+}
+
+/**
  * Get the address the .initrd section begins at.
  */
 paddr_t layout_initrd_begin(void)
diff --git a/src/layout_fake.c b/src/layout_fake.c
index d080e24..eab3492 100644
--- a/src/layout_fake.c
+++ b/src/layout_fake.c
@@ -43,6 +43,16 @@
 	return pa_init(600);
 }
 
+paddr_t layout_stacks_begin(void)
+{
+	return pa_init(700);
+}
+
+paddr_t layout_stacks_end(void)
+{
+	return pa_init(700);
+}
+
 paddr_t layout_primary_begin(void)
 {
 	return pa_init(0x80000);
diff --git a/src/load.c b/src/load.c
index 367c380..3056769 100644
--- a/src/load.c
+++ b/src/load.c
@@ -10,6 +10,7 @@
 
 #include <stdbool.h>
 
+#include "hf/arch/init.h"
 #include "hf/arch/other_world.h"
 #include "hf/arch/plat/ffa.h"
 #include "hf/arch/vm.h"
@@ -690,6 +691,10 @@
 		CHECK(vm_identity_map(vm_locked, layout_data_begin(),
 				      layout_data_end(), MM_MODE_R | MM_MODE_W,
 				      ppool, NULL));
+
+		CHECK(arch_stack_mm_init(mm_lock_ptable_unsafe(&vm->ptable),
+					 ppool));
+
 		plat_console_mm_init(mm_lock_ptable_unsafe(&vm->ptable), ppool);
 	}
 
diff --git a/src/mm.c b/src/mm.c
index 2f07d2f..8180be9 100644
--- a/src/mm.c
+++ b/src/mm.c
@@ -11,6 +11,8 @@
 #include <stdatomic.h>
 #include <stdint.h>
 
+#include "hf/arch/init.h"
+
 #include "hf/assert.h"
 #include "hf/check.h"
 #include "hf/dlog.h"
@@ -1110,6 +1112,8 @@
 		  pa_addr(layout_rodata_end()));
 	dlog_info("data: %#x - %#x\n", pa_addr(layout_data_begin()),
 		  pa_addr(layout_data_end()));
+	dlog_info("stacks: %#x - %#x\n", pa_addr(layout_stacks_begin()),
+		  pa_addr(layout_stacks_end()));
 
 	/* ASID 0 is reserved for use by the hypervisor. */
 	if (!mm_ptable_init(&ptable, 0, MM_FLAG_STAGE1, ppool)) {
@@ -1135,5 +1139,8 @@
 	mm_identity_map(stage1_locked, layout_data_begin(), layout_data_end(),
 			MM_MODE_R | MM_MODE_W, ppool);
 
+	/* Arch-specific stack mapping. */
+	arch_stack_mm_init(stage1_locked, ppool);
+
 	return true;
 }
diff --git a/src/vm.c b/src/vm.c
index 9e25d7d..aa25fb8 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -368,6 +368,8 @@
 	       vm_unmap(vm_locked, layout_rodata_begin(), layout_rodata_end(),
 			ppool) &&
 	       vm_unmap(vm_locked, layout_data_begin(), layout_data_end(),
+			ppool) &&
+	       vm_unmap(vm_locked, layout_stacks_begin(), layout_stacks_end(),
 			ppool);
 }
 
diff --git a/test/vmapi/el0_partitions/services/boot.c b/test/vmapi/el0_partitions/services/boot.c
index 6fcd9d8..ed20d2a 100644
--- a/test/vmapi/el0_partitions/services/boot.c
+++ b/test/vmapi/el0_partitions/services/boot.c
@@ -27,6 +27,8 @@
 extern uint8_t volatile rodata_end[];
 extern uint8_t volatile data_begin[];
 extern uint8_t volatile data_end[];
+extern uint8_t volatile stacks_begin[];
+extern uint8_t volatile stacks_end[];
 extern uint8_t volatile image_end[];
 
 TEST_SERVICE(boot_memory)
@@ -126,6 +128,18 @@
 	}
 	EXPECT_EQ(num_ro_pages_in_data, 1);
 
+	num_pages =
+		align_up((stacks_end - stacks_begin), PAGE_SIZE) / PAGE_SIZE;
+	base_va = (uint64_t)align_down(stacks_begin, PAGE_SIZE);
+
+	while (num_pages) {
+		struct ffa_value res = ffa_mem_perm_get(base_va);
+		EXPECT_EQ(res.func, FFA_SUCCESS_32);
+		EXPECT_EQ(res.arg2, FFA_MEM_PERM_RW);
+		base_va += PAGE_SIZE;
+		num_pages--;
+	}
+
 	num_pages = align_up((image_end - data_end), PAGE_SIZE) / PAGE_SIZE;
 	base_va = (uint64_t)align_down(data_end, PAGE_SIZE);