Introduce new states to support memory sharing.
Stage-1 memory remains either valid or invalid but stage-2 memory is
owned and shared between VMs. The previous concept of unmapping means to
makethe memory absent and unowned.
Change-Id: I516abfe098fa6a7a072a47e89691687a302dd725
diff --git a/inc/hf/mm.h b/inc/hf/mm.h
index 912b5d3..11a7753 100644
--- a/inc/hf/mm.h
+++ b/inc/hf/mm.h
@@ -25,9 +25,73 @@
#include "hf/addr.h"
+/* Keep macro alignment */
+/* clang-format off */
+
#define PAGE_SIZE (1 << PAGE_BITS)
#define MM_PTE_PER_PAGE (PAGE_SIZE / sizeof(pte_t))
+
+/* The following are arch-independent page mapping modes. */
+#define MM_MODE_R 0x0001 /* read */
+#define MM_MODE_W 0x0002 /* write */
+#define MM_MODE_X 0x0004 /* execute */
+#define MM_MODE_D 0x0008 /* device */
+
+/*
+ * Memory in stage-1 is either valid (present) or invalid (absent).
+ *
+ * Memory in stage-2 has more states to track sharing, borrowing and giving of
+ * memory. The states are made up of three parts:
+ *
+ * 1. V = valid/invalid : Whether the memory is part of the VM's address
+ * space. A fault will be generated if accessed when
+ * invalid.
+ * 2. O = owned/unowned : Whether the memory is owned by the VM.
+ * 3. X = exclusive/shared : Whether access is exclusive to the VM or shared
+ * with at most one other.
+ *
+ * These parts compose to form the following state:
+ *
+ * - V O X : Owner of memory with exclusive access.
+ * - V O !X : Owner of memory with access shared with at most one other VM.
+ * - V !O X : Borrower of memory with exclusive access.
+ * - V !O !X : Borrower of memory where access is shared with the owner.
+ * - !V O X : Owner of memory lent to a VM that has exclusive access.
+ *
+ * - !V O !X : Unused. Owner of shared memory always has access.
+ *
+ * - !V !O X : Invalid memory. Memory is unrelated to the VM.
+ * - !V !O !X : Invalid memory. Memory is unrelated to the VM.
+ *
+ * Modes are selected so that owner of exclusive memory is the default.
+ */
+#define MM_MODE_INVALID 0x0010
+#define MM_MODE_UNOWNED 0x0020
+#define MM_MODE_SHARED 0x0040
+
+/**
+ * This flag indicates that memory allocation must not use locks. This is
+ * relevant in systems where interlocked operations are only available after
+ * virtual memory is enabled.
+ */
+#define MM_MODE_NOSYNC 0x0080
+
+/**
+ * This flag indicates that the mapping is intended to be used in a first
+ * stage translation table, which might have different encodings for the
+ * attribute bits than the second stage table.
+ */
+#define MM_MODE_STAGE1 0x0100
+
+/**
+ * This flag indicates that no TLB invalidations should be issued for the
+ * changes in the page table.
+ */
+#define MM_MODE_NOINVALIDATE 0x0200
+
+/* clang-format on */
+
struct mm_page_table {
alignas(PAGE_SIZE) pte_t entries[MM_PTE_PER_PAGE];
};
@@ -41,32 +105,6 @@
paddr_t root;
};
-/* The following are arch-independent page mapping modes. */
-#define MM_MODE_R 0x01 /* read */
-#define MM_MODE_W 0x02 /* write */
-#define MM_MODE_X 0x04 /* execute */
-#define MM_MODE_D 0x08 /* device */
-
-/**
- * This flag indicates that memory allocation must not use locks. This is
- * relevant in systems where interlocked operations are only available after
- * virtual memory is enabled.
- */
-#define MM_MODE_NOSYNC 0x10
-
-/**
- * This flag indicates that the mapping is intended to be used in a first
- * stage translation table, which might have different encodings for the
- * attribute bits than the second stage table.
- */
-#define MM_MODE_STAGE1 0x20
-
-/**
- * This flag indicates that no TLB invalidations should be issued for the
- * changes in the page table.
- */
-#define MM_MODE_NOINVALIDATE 0x40
-
bool mm_ptable_init(struct mm_ptable *t, int mode);
void mm_ptable_fini(struct mm_ptable *t, int mode);
void mm_ptable_dump(struct mm_ptable *t, int mode);
diff --git a/src/arch/aarch64/inc/hf/arch/mm.h b/src/arch/aarch64/inc/hf/arch/mm.h
index dd9c829..8423c97 100644
--- a/src/arch/aarch64/inc/hf/arch/mm.h
+++ b/src/arch/aarch64/inc/hf/arch/mm.h
@@ -21,7 +21,16 @@
#include "hf/addr.h"
-/** A page table entry. */
+/**
+ * A page table entry (PTE).
+ *
+ * It will take one of the following forms:
+ *
+ * 1. absent : There is no mapping.
+ * 2. invalid block : Represents a block that is not in the address space.
+ * 3. valid block : Represents a block that is in the address space.
+ * 4. table : Represents a reference to a table of PTEs.
+ */
typedef uint64_t pte_t;
#define PAGE_LEVEL_BITS 9
@@ -35,9 +44,31 @@
pte_t arch_mm_table_pte(int level, paddr_t pa);
pte_t arch_mm_block_pte(int level, paddr_t pa, uint64_t attrs);
bool arch_mm_is_block_allowed(int level);
+
+/**
+ * Determines if a PTE is present i.e. it contains information and therefore
+ * needs to exist in the page table. Any non-absent PTE is present.
+ */
bool arch_mm_pte_is_present(pte_t pte, int level);
-bool arch_mm_pte_is_table(pte_t pte, int level);
+
+/**
+ * Determines if a PTE is valid i.e. it can affect the address space. Tables and
+ * valid blocks fall into this category. Invalid blocks do not as they hold
+ * information about blocks that are not in the address space.
+ */
+bool arch_mm_pte_is_valid(pte_t pte, int level);
+
+/**
+ * Determines if a PTE is a block and represents an address range, valid or
+ * invalid.
+ */
bool arch_mm_pte_is_block(pte_t pte, int level);
+
+/**
+ * Determines if a PTE represents a reference to a table of PTEs.
+ */
+bool arch_mm_pte_is_table(pte_t pte, int level);
+
paddr_t arch_mm_clear_pa(paddr_t pa);
paddr_t arch_mm_block_from_pte(pte_t pte);
paddr_t arch_mm_table_from_pte(pte_t pte);
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index 1745122..a3809e0 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -29,6 +29,10 @@
#define OUTER_SHAREABLE UINT64_C(2)
#define INNER_SHAREABLE UINT64_C(3)
+#define PTE_VALID (UINT64_C(1) << 0)
+#define PTE_LEVEL0_BLOCK (UINT64_C(1) << 1)
+#define PTE_TABLE (UINT64_C(1) << 1)
+
#define STAGE1_XN (UINT64_C(1) << 54)
#define STAGE1_PXN (UINT64_C(1) << 53)
#define STAGE1_CONTIGUOUS (UINT64_C(1) << 52)
@@ -68,6 +72,10 @@
#define TABLE_XNTABLE (UINT64_C(1) << 60)
#define TABLE_PXNTABLE (UINT64_C(1) << 59)
+/* The following are stage-2 software defined attributes. */
+#define STAGE2_SW_OWNED (UINT64_C(1) << 55)
+#define STAGE2_SW_EXCLUSIVE (UINT64_C(1) << 56)
+
/* The following are stage-2 memory attributes for normal memory. */
#define STAGE2_NONCACHEABLE UINT64_C(1)
#define STAGE2_WRITETHROUGH UINT64_C(2)
@@ -91,7 +99,7 @@
(((UINT64_C(1) << 48) - 1) & ~((UINT64_C(1) << PAGE_BITS) - 1))
/** Mask for the attribute bits of the pte. */
-#define PTE_ATTR_MASK (~(PTE_ADDR_MASK | UINT64_C(0x3)))
+#define PTE_ATTR_MASK (~(PTE_ADDR_MASK | (UINT64_C(1) << 1)))
static uint8_t mm_s2_max_level;
static uint8_t mm_s2_root_table_count;
@@ -115,7 +123,7 @@
{
/* This is the same for all levels on aarch64. */
(void)level;
- return pa_addr(pa) | 0x3;
+ return pa_addr(pa) | PTE_TABLE | PTE_VALID;
}
/**
@@ -125,10 +133,10 @@
*/
pte_t arch_mm_block_pte(int level, paddr_t pa, uint64_t attrs)
{
- pte_t pte = pa_addr(pa) | attrs | 0x1;
+ pte_t pte = pa_addr(pa) | attrs;
if (level == 0) {
/* A level 0 'block' is actually a page entry. */
- pte |= 0x2;
+ pte |= PTE_LEVEL0_BLOCK;
}
return pte;
}
@@ -144,21 +152,22 @@
}
/**
- * Determines if the given pte is present, i.e., if it points to another table,
- * to a page, or a block of pages.
+ * Determines if the given pte is present, i.e., if it is valid or it is invalid
+ * but still holds state about the memory so needs to be present in the table.
*/
bool arch_mm_pte_is_present(pte_t pte, int level)
{
- (void)level;
- return (pte & 0x1) != 0;
+ return arch_mm_pte_is_valid(pte, level) || (pte & STAGE2_SW_OWNED) != 0;
}
/**
- * Determines if the given pte references another table.
+ * Determines if the given pte is valid, i.e., if it points to another table,
+ * to a page, or a block of pages that can be accessed.
*/
-bool arch_mm_pte_is_table(pte_t pte, int level)
+bool arch_mm_pte_is_valid(pte_t pte, int level)
{
- return level != 0 && (pte & 0x3) == 0x3;
+ (void)level;
+ return (pte & PTE_VALID) != 0;
}
/**
@@ -168,7 +177,18 @@
{
/* We count pages at level 0 as blocks. */
return arch_mm_is_block_allowed(level) &&
- (pte & 0x3) == (level == 0 ? 0x3 : 0x1);
+ (level == 0 ? (pte & PTE_LEVEL0_BLOCK) != 0
+ : arch_mm_pte_is_present(pte, level) &&
+ !arch_mm_pte_is_table(pte, level));
+}
+
+/**
+ * Determines if the given pte references another table.
+ */
+bool arch_mm_pte_is_table(pte_t pte, int level)
+{
+ return level != 0 && arch_mm_pte_is_valid(pte, level) &&
+ (pte & PTE_TABLE) != 0;
}
static uint64_t pte_addr(pte_t pte)
@@ -302,6 +322,11 @@
} else {
attrs |= STAGE1_ATTRINDX(STAGE1_NORMALINDX);
}
+
+ /* Define the valid bit. */
+ if (!(mode & MM_MODE_INVALID)) {
+ attrs |= PTE_VALID;
+ }
} else {
uint64_t access = 0;
@@ -340,6 +365,21 @@
attrs |= STAGE2_MEMATTR_NORMAL(STAGE2_WRITEBACK,
STAGE2_WRITEBACK);
}
+
+ /* Define the ownership bit. */
+ if (!(mode & MM_MODE_UNOWNED)) {
+ attrs |= STAGE2_SW_OWNED;
+ }
+
+ /* Define the exclusivity bit. */
+ if (!(mode & MM_MODE_SHARED)) {
+ attrs |= STAGE2_SW_EXCLUSIVE;
+ }
+
+ /* Define the validity bit. */
+ if (!(mode & MM_MODE_INVALID)) {
+ attrs |= PTE_VALID;
+ }
}
return attrs;
diff --git a/src/arch/fake/inc/hf/arch/mm.h b/src/arch/fake/inc/hf/arch/mm.h
index 9e8fe08..face8cf 100644
--- a/src/arch/fake/inc/hf/arch/mm.h
+++ b/src/arch/fake/inc/hf/arch/mm.h
@@ -35,6 +35,7 @@
pte_t arch_mm_block_pte(int level, paddr_t pa, uint64_t attrs);
bool arch_mm_is_block_allowed(int level);
bool arch_mm_pte_is_present(pte_t pte, int level);
+bool arch_mm_pte_is_valid(pte_t pte, int level);
bool arch_mm_pte_is_table(pte_t pte, int level);
bool arch_mm_pte_is_block(pte_t pte, int level);
paddr_t arch_mm_clear_pa(paddr_t pa);
diff --git a/src/arch/fake/mm.c b/src/arch/fake/mm.c
index 81e106f..721b230 100644
--- a/src/arch/fake/mm.c
+++ b/src/arch/fake/mm.c
@@ -50,7 +50,7 @@
pte_t arch_mm_block_pte(int level, paddr_t pa, uint64_t attrs)
{
/* Single pages are encoded differently to larger blocks. */
- pte_t pte = pa_addr(pa) | attrs | 0x1;
+ pte_t pte = pa_addr(pa) | attrs;
if (level == 0) {
pte |= 0x2;
}
@@ -66,6 +66,12 @@
bool arch_mm_pte_is_present(pte_t pte, int level)
{
+ /* TODO: model attributes. */
+ return arch_mm_pte_is_valid(pte, level);
+}
+
+bool arch_mm_pte_is_valid(pte_t pte, int level)
+{
(void)level;
return (pte & 0x1) != 0;
}
@@ -79,8 +85,9 @@
bool arch_mm_pte_is_block(pte_t pte, int level)
{
/* Single pages are encoded differently to larger blocks. */
- return arch_mm_is_block_allowed(level) &&
- (pte & 0x3) == (level == 0 ? 0x3 : 0x1);
+ return (level == 0 ? (pte & 0x2) != 0
+ : arch_mm_pte_is_present(pte, level) &&
+ !arch_mm_pte_is_table(pte, level));
}
static uint64_t hf_arch_fake_mm_clear_pte_attrs(pte_t pte)
@@ -107,9 +114,8 @@
uint64_t arch_mm_pte_attrs(pte_t pte)
{
- /* Attributes are not modelled. */
- (void)pte;
- return 0;
+ /* Attributes are not modelled fully. */
+ return pte & 0x1;
}
uint64_t arch_mm_combine_table_entry_attrs(uint64_t table_attrs,
@@ -143,9 +149,8 @@
uint64_t arch_mm_mode_to_attrs(int mode)
{
- /* Attributes are not modelled. */
- (void)mode;
- return 0;
+ /* Attributes are not modelled fully. */
+ return mode & MM_MODE_INVALID ? 0 : 0x1;
}
bool arch_mm_init(paddr_t table, bool first)
diff --git a/src/mm.c b/src/mm.c
index 161fbeb..18a2e78 100644
--- a/src/mm.c
+++ b/src/mm.c
@@ -162,10 +162,10 @@
/**
* Replaces a page table entry with the given value. If both old and new values
- * are present, it performs a break-before-make sequence where it first writes
- * an absent value to the PTE, flushes the TLB, then writes the actual new
- * value. This is to prevent cases where CPUs have different 'present' values in
- * their TLBs, which may result in issues for example in cache coherency.
+ * are valid, it performs a break-before-make sequence where it first writes an
+ * invalid value to the PTE, flushes the TLB, then writes the actual new value.
+ * This is to prevent cases where CPUs have different 'valid' values in their
+ * TLBs, which may result in issues for example in cache coherency.
*/
static void mm_replace_entry(ptable_addr_t begin, pte_t *pte, pte_t new_pte,
uint8_t level, int flags)
@@ -176,8 +176,8 @@
* We need to do the break-before-make sequence if both values are
* present, and if it hasn't been inhibited by the NOBBM flag.
*/
- if (!(flags & MAP_FLAG_NOBBM) && arch_mm_pte_is_present(v, level) &&
- arch_mm_pte_is_present(new_pte, level)) {
+ if (!(flags & MAP_FLAG_NOBBM) && arch_mm_pte_is_valid(v, level) &&
+ arch_mm_pte_is_valid(new_pte, level)) {
*pte = arch_mm_absent_pte(level);
mm_invalidate_tlb(begin, begin + mm_entry_size(level),
flags & MAP_FLAG_STAGE1);
@@ -386,13 +386,15 @@
* provided.
*/
static bool mm_ptable_identity_update(struct mm_ptable *t, paddr_t pa_begin,
- paddr_t pa_end, int mode, bool unmap)
+ paddr_t pa_end, int mode)
{
- uint64_t attrs = unmap ? 0 : arch_mm_mode_to_attrs(mode);
+ uint64_t attrs = arch_mm_mode_to_attrs(mode);
int flags = (mode & MM_MODE_NOSYNC ? MAP_FLAG_NOSYNC : 0) |
(mode & MM_MODE_NOINVALIDATE ? MAP_FLAG_NOBBM : 0) |
(mode & MM_MODE_STAGE1 ? MAP_FLAG_STAGE1 : 0) |
- (unmap ? MAP_FLAG_UNMAP : 0);
+ (mode & MM_MODE_INVALID && mode & MM_MODE_UNOWNED
+ ? MAP_FLAG_UNMAP
+ : 0);
uint8_t root_level = arch_mm_max_level(mode) + 1;
ptable_addr_t ptable_end =
arch_mm_root_table_count(mode) * mm_entry_size(root_level);
@@ -439,7 +441,7 @@
static bool mm_ptable_identity_map(struct mm_ptable *t, paddr_t pa_begin,
paddr_t pa_end, int mode)
{
- return mm_ptable_identity_update(t, pa_begin, pa_end, mode, false);
+ return mm_ptable_identity_update(t, pa_begin, pa_end, mode);
}
/**
@@ -449,7 +451,8 @@
static bool mm_ptable_unmap(struct mm_ptable *t, paddr_t pa_begin,
paddr_t pa_end, int mode)
{
- return mm_ptable_identity_update(t, pa_begin, pa_end, mode, true);
+ return mm_ptable_identity_update(
+ t, pa_begin, pa_end, mode | MM_MODE_UNOWNED | MM_MODE_INVALID);
}
/**
@@ -628,8 +631,8 @@
}
/**
- * Determines if the given address is mapped in the given page table by
- * recursively traversing all levels of the page table.
+ * Determines if the given address is valid in the address space of the given
+ * page table by recursively traversing all levels of the page table.
*/
static bool mm_is_mapped_recursive(struct mm_page_table *table,
ptable_addr_t addr, uint8_t level)
@@ -644,8 +647,8 @@
pte = table->entries[mm_index(addr, level)];
- if (arch_mm_pte_is_block(pte, level)) {
- return true;
+ if (!arch_mm_pte_is_valid(pte, level)) {
+ return false;
}
if (arch_mm_pte_is_table(pte, level)) {
@@ -654,12 +657,13 @@
addr, level - 1);
}
- /* The entry is not present. */
- return false;
+ /* The entry is a valid block. */
+ return true;
}
/**
- * Determines if the given address is mapped in the given page table.
+ * Determines if the given address is valid in the address space of the given
+ * page table.
*/
static bool mm_ptable_is_mapped(struct mm_ptable *t, ptable_addr_t addr,
int mode)
diff --git a/test/arch/mm_test.c b/test/arch/mm_test.c
index 3498086..9bc947f 100644
--- a/test/arch/mm_test.c
+++ b/test/arch/mm_test.c
@@ -61,26 +61,97 @@
/* TODO: initialize arch_mm and check max level of stage-2. */
/**
- * A block is present and mutually exclusive from a table.
+ * An absent entry is not present, valid, a block nor a table.
*/
-#define LEVEL_TEST(lvl) \
- TEST(arch_mm, block_properties_level##lvl) \
- { \
- uint8_t level = lvl; \
- uint64_t attrs = arch_mm_mode_to_attrs(0); \
- pte_t block_pte; \
- \
- /* Test doesn't apply if a block is not allowed. */ \
- if (!arch_mm_is_block_allowed(level)) { \
- return; \
- } \
- \
- block_pte = arch_mm_block_pte(level, pa_init(0x12345678000), \
- attrs); \
- \
- EXPECT_TRUE(arch_mm_pte_is_present(block_pte, level)); \
- EXPECT_TRUE(arch_mm_pte_is_block(block_pte, level)); \
- EXPECT_FALSE(arch_mm_pte_is_table(block_pte, level)); \
+#define LEVEL_TEST(lvl) \
+ TEST(arch_mm, absent_properties_level##lvl) \
+ { \
+ uint8_t level = lvl; \
+ pte_t absent_pte; \
+ \
+ absent_pte = arch_mm_absent_pte(level); \
+ \
+ EXPECT_FALSE(arch_mm_pte_is_present(absent_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_valid(absent_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_block(absent_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_table(absent_pte, level)); \
+ }
+EXPAND_LEVEL_TESTS
+#undef LEVEL_TEST
+
+/**
+ * An invalid block is present and mutually exclusive from a table.
+ */
+#define LEVEL_TEST(lvl) \
+ TEST(arch_mm, invalid_block_properties_level##lvl) \
+ { \
+ uint8_t level = lvl; \
+ uint64_t attrs = arch_mm_mode_to_attrs(MM_MODE_INVALID); \
+ pte_t block_pte; \
+ \
+ /* Test doesn't apply if a block is not allowed. */ \
+ if (!arch_mm_is_block_allowed(level)) { \
+ return; \
+ } \
+ \
+ block_pte = arch_mm_block_pte(level, pa_init(PAGE_SIZE * 19), \
+ attrs); \
+ \
+ EXPECT_TRUE(arch_mm_pte_is_present(block_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_valid(block_pte, level)); \
+ EXPECT_TRUE(arch_mm_pte_is_block(block_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_table(block_pte, level)); \
+ }
+EXPAND_LEVEL_TESTS
+#undef LEVEL_TEST
+
+/**
+ * A valid block is present and mutually exclusive from a table.
+ */
+#define LEVEL_TEST(lvl) \
+ TEST(arch_mm, valid_block_properties_level##lvl) \
+ { \
+ uint8_t level = lvl; \
+ uint64_t attrs = arch_mm_mode_to_attrs(0); \
+ pte_t block_pte; \
+ \
+ /* Test doesn't apply if a block is not allowed. */ \
+ if (!arch_mm_is_block_allowed(level)) { \
+ return; \
+ } \
+ \
+ block_pte = arch_mm_block_pte( \
+ level, pa_init(PAGE_SIZE * 12345678), attrs); \
+ \
+ EXPECT_TRUE(arch_mm_pte_is_present(block_pte, level)); \
+ EXPECT_TRUE(arch_mm_pte_is_valid(block_pte, level)); \
+ EXPECT_TRUE(arch_mm_pte_is_block(block_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_table(block_pte, level)); \
+ }
+EXPAND_LEVEL_TESTS
+#undef LEVEL_TEST
+
+/**
+ * A table is present, valid and mutually exclusive from a block.
+ */
+#define LEVEL_TEST(lvl) \
+ TEST(arch_mm, table_properties_level##lvl) \
+ { \
+ uint8_t level = lvl; \
+ pte_t table_pte; \
+ \
+ /* Test doesn't apply to level 0 as there can't be a table. */ \
+ if (level == 0) { \
+ return; \
+ } \
+ \
+ table_pte = arch_mm_table_pte(level, \
+ pa_init(PAGE_SIZE * 999999999)); \
+ \
+ EXPECT_TRUE(arch_mm_pte_is_present(table_pte, level)); \
+ EXPECT_TRUE(arch_mm_pte_is_valid(table_pte, level)); \
+ EXPECT_FALSE(arch_mm_pte_is_block(table_pte, level)); \
+ EXPECT_TRUE(arch_mm_pte_is_table(table_pte, level)); \
}
EXPAND_LEVEL_TESTS
#undef LEVEL_TEST
@@ -109,6 +180,13 @@
EXPECT_EQ(pa_addr(arch_mm_block_from_pte(block_pte)), \
pa_addr(addr)); \
\
+ addr = pa_init(PAGE_SIZE * 17); \
+ attrs = arch_mm_mode_to_attrs(MM_MODE_INVALID); \
+ block_pte = arch_mm_block_pte(level, addr, attrs); \
+ EXPECT_EQ(arch_mm_pte_attrs(block_pte), attrs); \
+ EXPECT_EQ(pa_addr(arch_mm_block_from_pte(block_pte)), \
+ pa_addr(addr)); \
+ \
addr = pa_init(PAGE_SIZE * 500); \
attrs = arch_mm_mode_to_attrs(MM_MODE_R | MM_MODE_W); \
block_pte = arch_mm_block_pte(level, addr, attrs); \