fix(mm): extend Stage 1 mapping limit

As Stage 1 mappings are limited to 512GB, registering FF-A RxTx buffers
fails when the physical address of these buffers exceeds 512GB. This
fix removes the limitation and allows Stage 1 mapping up to the
supported PA range.

Make necessary changes in hftest, to initialize stage-1 page table
maximum level for test running in VM and non-VM case. Also retain the
stage-1 PA range as 512GB for test running in VM.

Signed-off-by: Arunachalam Ganapathy <arunachalam.ganapathy@arm.com>
Change-Id: I4cb8d68fc18e0edf4a7ee06ae636849d552d72a9
diff --git a/inc/hf/arch/mm.h b/inc/hf/arch/mm.h
index b629e8b..043c564 100644
--- a/inc/hf/arch/mm.h
+++ b/inc/hf/arch/mm.h
@@ -115,6 +115,11 @@
 void arch_mm_flush_dcache(void *base, size_t size);
 
 /**
+ * Sets the maximum level allowed in the page table for stage-1.
+ */
+void arch_mm_stage1_max_level_set(uint32_t pa_bits);
+
+/**
  * Gets the maximum level allowed in the page table for stage-1.
  */
 uint8_t arch_mm_stage1_max_level(void);
diff --git a/src/arch/aarch64/hftest/mm.c b/src/arch/aarch64/hftest/mm.c
index 121a32c..3b4b54e 100644
--- a/src/arch/aarch64/hftest/mm.c
+++ b/src/arch/aarch64/hftest/mm.c
@@ -27,6 +27,9 @@
 static uintreg_t mm_reset_tcr_el1;
 static uintreg_t mm_reset_sctlr_el1;
 
+/* For hftest, limit Stage1 PA range to 512GB (1 << 39) */
+#define HFTEST_S1_PA_BITS (39)
+
 /**
  * Initialize MMU for a test running in EL1.
  */
@@ -50,6 +53,12 @@
 	}
 
 	/*
+	 * Limit PA bits to HFTEST_S1_PA_BITS. Using the pa_bits reported by
+	 * arch_mm_get_pa_range requires an increase in page pool size.
+	 */
+	arch_mm_stage1_max_level_set(HFTEST_S1_PA_BITS);
+
+	/*
 	 * Preserve initial values of the system registers in case we want to
 	 * reset them.
 	 */
@@ -72,7 +81,7 @@
 		     (3 << 12) |		/* SH0, inner shareable. */
 		     (1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
 		     (1 << 8) |	 /* IRGN0, normal mem, WB RA WA Cacheable. */
-		     (25 << 0) | /* T0SZ, input address is 2^39 bytes. */
+		     (64 - HFTEST_S1_PA_BITS) | /* T0SZ, 2^hftest_s1_pa_bits */
 		     0;
 
 	mm_sctlr_el1 = (1 << 0) |  /* M, enable stage 1 EL2 MMU. */
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index ab9026d..0834a48 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -142,6 +142,7 @@
 	uintreg_t hcr_el2;
 } arch_mm_config;
 
+static uint8_t mm_s1_max_level;
 static uint8_t mm_s2_max_level;
 static uint8_t mm_s2_root_table_count;
 
@@ -689,14 +690,22 @@
 	return mode;
 }
 
+void arch_mm_stage1_max_level_set(uint32_t pa_bits)
+{
+	/* Maximum supported PA range in bits is 48 */
+	CHECK(pa_bits <= 48);
+
+	if (pa_bits >= 40) {
+		mm_s1_max_level = 3;
+	} else {
+		/* Setting to 2 covers physical memory upto 512GB */
+		mm_s1_max_level = 2;
+	}
+}
+
 uint8_t arch_mm_stage1_max_level(void)
 {
-	/*
-	 * For stage 1 we hard-code this to 2 for now so that we can
-	 * save one page table level at the expense of limiting the
-	 * physical memory to 512GB.
-	 */
-	return 2;
+	return mm_s1_max_level;
 }
 
 uint8_t arch_mm_stage2_max_level(void)
@@ -806,6 +815,8 @@
 		mm_s2_max_level = 1;
 	}
 
+	arch_mm_stage1_max_level_set(pa_bits);
+
 	/*
 	 * Since the shallowest possible tree is used, the maximum number of
 	 * concatenated tables must be used. This means if no more than 4 bits
@@ -822,6 +833,10 @@
 		"Stage 2 has %d page table levels with %d pages at the root.\n",
 		mm_s2_max_level + 1, mm_s2_root_table_count);
 
+	dlog_info(
+		"Stage 1 has %d page table levels with %d pages at the root.\n",
+		mm_s1_max_level + 1, arch_mm_stage1_root_table_count());
+
 	/*
 	 * If the PE implements S-EL2 then VTCR_EL2.NSA/NSW bits are significant
 	 * in secure state. In non-secure state, NSA/NSW behave as if set to
@@ -901,14 +916,16 @@
 			 << 24) | /* IRGN1, normal mem, WB RA WA Cacheable. */
 			(1UL << 23) | /* EPD1 - Disable TTBR1_EL2 translation */
 			(0UL << 22) | /* TTBR0_EL2.ASID defines ASID */
-			(25UL << 16) | /* T1SZ, input address is 2^39 bytes. */
-			(0UL << 14) |  /* TG0, granule size, 4KB. */
-			(3UL << 12) |  /* SH0, inner shareable. */
+			((64 - pa_bits)
+			 << 16) | /* T1SZ, input address is 2^pa_bits bytes. */
+			(0UL << 14) | /* TG0, granule size, 4KB. */
+			(3UL << 12) | /* SH0, inner shareable. */
 			(1UL
 			 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
 			(1UL
 			 << 8) | /* IRGN0, normal mem, WB RA WA Cacheable. */
-			(25UL << 0) | /* T0SZ, input address is 2^39 bytes. */
+			((64 - pa_bits)
+			 << 0) | /* T0SZ, input address is 2^pa_bits bytes. */
 			0;
 	} else {
 		arch_mm_config.tcr_el2 =
@@ -918,7 +935,8 @@
 			(3 << 12) |		   /* SH0, inner shareable. */
 			(1 << 10) | /* ORGN0, normal mem, WB RA WA Cacheable. */
 			(1 << 8) |  /* IRGN0, normal mem, WB RA WA Cacheable. */
-			(25 << 0) | /* T0SZ, input address is 2^39 bytes. */
+			((64 - pa_bits)
+			 << 0) | /* T0SZ, input address is  2^pa_bits bytes. */
 			0;
 	}
 	return true;
diff --git a/src/arch/fake/mm.c b/src/arch/fake/mm.c
index c1f6e98..2403926 100644
--- a/src/arch/fake/mm.c
+++ b/src/arch/fake/mm.c
@@ -120,6 +120,12 @@
 	/* There's no modelling of the cache. */
 }
 
+void arch_mm_stage1_max_level_set(uint32_t pa_bits)
+{
+	/* Not required to set this value as its hardcoded to 2 */
+	(void)pa_bits;
+}
+
 uint8_t arch_mm_stage1_max_level(void)
 {
 	return 2;
diff --git a/src/mm.c b/src/mm.c
index 8075d02..2f07d2f 100644
--- a/src/mm.c
+++ b/src/mm.c
@@ -1117,6 +1117,11 @@
 		return false;
 	}
 
+	/* Initialize arch_mm before calling below mapping routines */
+	if (!arch_mm_init(ptable.root)) {
+		return false;
+	}
+
 	/* Let console driver map pages for itself. */
 	plat_console_mm_init(stage1_locked, ppool);
 
@@ -1130,5 +1135,5 @@
 	mm_identity_map(stage1_locked, layout_data_begin(), layout_data_end(),
 			MM_MODE_R | MM_MODE_W, ppool);
 
-	return arch_mm_init(ptable.root);
+	return true;
 }
diff --git a/test/arch/mm_test.c b/test/arch/mm_test.c
index 353513d..8c54448 100644
--- a/test/arch/mm_test.c
+++ b/test/arch/mm_test.c
@@ -44,7 +44,11 @@
  */
 TEST(arch_mm, max_level_stage1)
 {
-	uint8_t max_level = arch_mm_stage1_max_level();
+	uint8_t max_level;
+
+	arch_mm_stage1_max_level_set(arch_mm_get_pa_range());
+	max_level = arch_mm_stage1_max_level();
+
 	EXPECT_GE(max_level, MAX_LEVEL_LOWER_BOUND);
 	EXPECT_LE(max_level, MAX_LEVEL_UPPER_BOUND);
 }
diff --git a/test/hftest/mm.c b/test/hftest/mm.c
index aa46365..8f77429 100644
--- a/test/hftest/mm.c
+++ b/test/hftest/mm.c
@@ -43,6 +43,11 @@
 {
 	struct mm_stage1_locked stage1_locked;
 
+	/* Call arch init before calling below mapping routines */
+	if (!arch_vm_mm_init()) {
+		return false;
+	}
+
 	mpool_init(&ppool, sizeof(struct mm_page_table));
 	if (!mpool_add_chunk(&ppool, ptable_buf, sizeof(ptable_buf))) {
 		HFTEST_FAIL(true, "Failed to add buffer to page-table pool.");
@@ -58,10 +63,6 @@
 			pa_init(mm_ptable_addr_space_end(MM_FLAG_STAGE1)),
 			MM_MODE_R | MM_MODE_W | MM_MODE_X, &ppool);
 
-	if (!arch_vm_mm_init()) {
-		return false;
-	}
-
 	arch_vm_mm_enable(ptable.root);
 
 	return true;