feat(lib/realm): modify struct granule

This patch makes the following changes:
- val of spinlock_t type changes from unsigned int
 to unsigned char;
- 'enum granule_state' removed and replaced with
 macro definitions;
- type of 'struct granule' state changed to unsigned char;
- type of 'struct granule' refcount changed from unsigned long
  to unsigned short, as its maximum value cannot exceed 512.
- 'refcount' access functions modified to use 16-bit values.
These modifications change the size of 'struct granule' from
16 to 4 bytes and reduce the total size of 'granules[]'
array from 16MB to 4MB for default RMM_MAX_GRANULES = 0x100000.

Change-Id: I611ed3f349806b033e03c52c5f60a0f903f93e11
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/arch/include/aarch64/atomics.h b/lib/arch/include/aarch64/atomics.h
index 8c80503..b68c761 100644
--- a/lib/arch/include/aarch64/atomics.h
+++ b/lib/arch/include/aarch64/atomics.h
@@ -12,7 +12,7 @@
 /*
  * Atomically adds @val to the 64-bit value stored at memory location @loc.
  */
-static inline void atomic_add_64(uint64_t *loc, long val)
+static inline void atomic_add_64(uint64_t *loc, uint64_t val)
 {
 	asm volatile(
 	"	stadd %[val], %[loc]\n"
@@ -26,9 +26,9 @@
  * Stores to memory with release semantics.
  * Returns the old value.
  */
-static inline unsigned long atomic_load_add_release_64(uint64_t *loc, long val)
+static inline uint64_t atomic_load_add_release_64(uint64_t *loc, uint64_t val)
 {
-	unsigned long old_val;
+	uint64_t old_val;
 
 	asm volatile(
 	"	ldaddl %[val], %[old_val], %[loc]\n"
@@ -41,6 +41,37 @@
 }
 
 /*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ */
+static inline void atomic_add_16(uint16_t *loc, uint16_t val)
+{
+	asm volatile(
+	"	staddh %w[val], %[loc]\n"
+	: [loc] "+Q" (*loc)
+	: [val] "r" (val)
+	: "memory");
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ * Stores to memory with release semantics.
+ * Returns the old value.
+ */
+static inline uint16_t atomic_load_add_release_16(uint16_t *loc, uint16_t val)
+{
+	uint16_t old_val;
+
+	asm volatile(
+	"	ldaddlh %w[val], %w[old_val], %[loc]\n"
+	: [loc] "+Q" (*loc),
+	  [old_val] "=r" (old_val)
+	: [val] "r" (val)
+	: "memory");
+
+	return old_val;
+}
+
+/*
  * Atomically set bit @bit in value pointed to by @loc with release semantics.
  */
 static inline void atomic_bit_set_release_64(uint64_t *loc, unsigned int bit)
diff --git a/lib/arch/include/aarch64/memory.h b/lib/arch/include/aarch64/memory.h
index 34fe271..2de3b03 100644
--- a/lib/arch/include/aarch64/memory.h
+++ b/lib/arch/include/aarch64/memory.h
@@ -71,4 +71,40 @@
 }
 #define SCA_READ64_ACQUIRE(_p) ((typeof(*(_p)))__sca_read64_acquire((void *)(_p)))
 
+/* Single-Copy Atomic 16-bit read */
+static inline uint16_t __sca_read16(uint16_t *ptr)
+{
+	uint16_t val;
+
+	/* To avoid misra-c2012-2.7 warnings */
+	(void)ptr;
+
+	asm volatile(
+	"	ldrh	%w[val], %[ptr]\n"
+	: [val] "=r" (val)
+	: [ptr] "m" (*ptr)
+	);
+
+	return val;
+}
+#define SCA_READ16(_p) ((typeof(*(_p)))__sca_read16((void *)(_p)))
+
+/* Single-Copy Atomic 16-bit read with ACQUIRE memory ordering semantics */
+static inline uint16_t __sca_read16_acquire(uint16_t *ptr)
+{
+	uint16_t val;
+
+	/* To avoid misra-c2012-2.7 warnings */
+	(void)ptr;
+
+	asm volatile(
+	"	ldarh	%w[val], %[ptr]\n"
+	: [val] "=r" (val)
+	: [ptr] "Q" (*ptr)
+	);
+
+	return val;
+}
+#define SCA_READ16_ACQUIRE(_p) ((typeof(*(_p)))__sca_read16_acquire((void *)(_p)))
+
 #endif /* MEMORY_H */
diff --git a/lib/arch/include/aarch64/spinlock.h b/lib/arch/include/aarch64/spinlock.h
index 37cab6e..4e2a0cd 100644
--- a/lib/arch/include/aarch64/spinlock.h
+++ b/lib/arch/include/aarch64/spinlock.h
@@ -7,9 +7,10 @@
 #define SPINLOCK_H
 
 /*
- * A trivial spinlock implementation, per ARM DDI 0487D.a, section K11.3.4.
+ * Trivial spinlock implementations, per ARM DDI 0487J.a, section K13.3.1
  */
 
+/* 32-bit spinlock */
 typedef struct {
 	unsigned int val;
 } spinlock_t;
@@ -44,4 +45,39 @@
 	);
 }
 
+/* 8-bit spinlock */
+typedef struct {
+	unsigned char val;
+} byte_spinlock_t;
+
+static inline void byte_spinlock_acquire(byte_spinlock_t *l)
+{
+	unsigned int tmp;
+
+	asm volatile(
+	"	sevl\n"
+	"	prfm	pstl1keep, %[lock]\n"
+	"1:\n"
+	"	wfe\n"
+	"	ldaxrb	%w[tmp], %[lock]\n"
+	"	cbnz	%w[tmp], 1b\n"
+	"	stxrb	%w[tmp], %w[one], %[lock]\n"
+	"	cbnz	%w[tmp], 1b\n"
+	: [lock] "+Q" (l->val),
+	  [tmp] "=&r" (tmp)
+	: [one] "r" (1)
+	: "memory"
+	);
+}
+
+static inline void byte_spinlock_release(byte_spinlock_t *l)
+{
+	asm volatile(
+	"	stlrb	wzr, %[lock]\n"
+	: [lock] "+Q" (l->val)
+	:
+	: "memory"
+	);
+}
+
 #endif /* SPINLOCK_H */
diff --git a/lib/arch/include/fake_host/atomics.h b/lib/arch/include/fake_host/atomics.h
index fd555e3..b923e38 100644
--- a/lib/arch/include/fake_host/atomics.h
+++ b/lib/arch/include/fake_host/atomics.h
@@ -12,7 +12,7 @@
 /*
  * Atomically adds @val to the 64-bit value stored at memory location @loc.
  */
-static inline void atomic_add_64(uint64_t *loc, long val)
+static inline void atomic_add_64(uint64_t *loc, uint64_t val)
 {
 	*loc = *loc + val;
 }
@@ -22,9 +22,30 @@
  * Stores to memory with release semantics.
  * Returns the old value.
  */
-static inline unsigned long atomic_load_add_release_64(uint64_t *loc, long val)
+static inline uint64_t atomic_load_add_release_64(uint64_t *loc, uint64_t val)
 {
-	unsigned long old_val = *loc;
+	uint64_t old_val = *loc;
+
+	*loc = *loc + val;
+	return old_val;
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ */
+static inline void atomic_add_16(uint16_t *loc, uint16_t val)
+{
+	*loc = *loc + val;
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ * Stores to memory with release semantics.
+ * Returns the old value.
+ */
+static inline uint16_t atomic_load_add_release_16(uint16_t *loc, uint16_t val)
+{
+	uint16_t old_val = *loc;
 
 	*loc = *loc + val;
 	return old_val;
@@ -69,7 +90,7 @@
 static inline bool atomic_bit_set_acquire_release_64(uint64_t *loc, unsigned int bit)
 {
 	uint64_t mask = (1UL << bit);
-	unsigned long old_val = *loc & mask;
+	uint16_t old_val = *loc & mask;
 
 	*loc |= mask;
 	return (old_val != 0UL);
diff --git a/lib/arch/include/fake_host/memory.h b/lib/arch/include/fake_host/memory.h
index 3f182a4..645f5d2 100644
--- a/lib/arch/include/fake_host/memory.h
+++ b/lib/arch/include/fake_host/memory.h
@@ -37,4 +37,19 @@
 }
 #define SCA_READ64_ACQUIRE(_p) ((typeof(*(_p)))__sca_read64_acquire((uint64_t *)(_p)))
 
+/* Single-Copy Atomic 16-bit read */
+static inline uint16_t __sca_read16(uint16_t *ptr)
+{
+	return *ptr;
+}
+#define SCA_READ16(_p) ((typeof(*(_p)))__sca_read16((uint16_t *)(_p)))
+
+/* Single-Copy Atomic 16-bit read with ACQUIRE memory ordering semantics */
+static inline uint16_t __sca_read16_acquire(uint16_t *ptr)
+{
+	return *ptr;
+}
+#define SCA_READ16_ACQUIRE(_p) ((typeof(*(_p)))__sca_read16_acquire((uint16_t *)(_p)))
+
+
 #endif /* MEMORY_H */
diff --git a/lib/arch/include/fake_host/spinlock.h b/lib/arch/include/fake_host/spinlock.h
index 7aa4b29..e279825 100644
--- a/lib/arch/include/fake_host/spinlock.h
+++ b/lib/arch/include/fake_host/spinlock.h
@@ -22,4 +22,18 @@
 	host_spinlock_release(l);
 }
 
+typedef struct byte_spinlock_s {
+	unsigned char val;
+} byte_spinlock_t;
+
+static inline void byte_spinlock_acquire(byte_spinlock_t *l)
+{
+	host_byte_spinlock_acquire(l);
+}
+
+static inline void byte_spinlock_release(byte_spinlock_t *l)
+{
+	host_byte_spinlock_release(l);
+}
+
 #endif /* SPINLOCK_H */
diff --git a/lib/common/include/fake_host/host_harness.h b/lib/common/include/fake_host/host_harness.h
index c99c900..b439b94 100644
--- a/lib/common/include/fake_host/host_harness.h
+++ b/lib/common/include/fake_host/host_harness.h
@@ -15,9 +15,13 @@
 void host_write_sysreg(char *reg_name, u_register_t v);
 
 struct spinlock_s;
+struct byte_spinlock_s;
+
 /* Fake host harness to lock and release spin lock */
 void host_spinlock_acquire(struct spinlock_s *l);
 void host_spinlock_release(struct spinlock_s *l);
+void host_byte_spinlock_acquire(struct byte_spinlock_s *l);
+void host_byte_spinlock_release(struct byte_spinlock_s *l);
 
 /*
  * Fake host Wrapper to copy data from NS into Realm memory. The function
diff --git a/lib/common/include/utils_def.h b/lib/common/include/utils_def.h
index bd763cc..129249f 100644
--- a/lib/common/include/utils_def.h
+++ b/lib/common/include/utils_def.h
@@ -63,6 +63,9 @@
 #define round_down(value, boundary)		\
 	((value) & ~round_boundary(value, boundary))
 
+/* Size of a 'm_' member of 's_' structure */
+#define SIZE_OF(s_, m_)		sizeof(((struct s_ *)NULL)->m_)
+
 /* Compute the number of elements in the given array */
 #define ARRAY_SIZE(a)	\
 	(sizeof(a) / sizeof((a)[0]))
diff --git a/lib/realm/include/granule.h b/lib/realm/include/granule.h
index 6173d24..8114449 100644
--- a/lib/realm/include/granule.h
+++ b/lib/realm/include/granule.h
@@ -15,14 +15,14 @@
 #include <spinlock.h>
 #include <status.h>
 
-static inline unsigned long granule_refcount_read_relaxed(struct granule *g)
+static inline unsigned short granule_refcount_read_relaxed(struct granule *g)
 {
-	return __sca_read64(&g->refcount);
+	return __sca_read16(&g->refcount);
 }
 
-static inline unsigned long granule_refcount_read_acquire(struct granule *g)
+static inline unsigned short granule_refcount_read_acquire(struct granule *g)
 {
-	return __sca_read64_acquire(&g->refcount);
+	return __sca_read16_acquire(&g->refcount);
 }
 
 /*
@@ -37,16 +37,16 @@
  * intended as a mechanism to ensure correctness.
  */
 static inline void __granule_assert_unlocked_invariants(struct granule *g,
-							enum granule_state state)
+							unsigned char state)
 {
 	(void)g;
 
 	switch (state) {
 	case GRANULE_STATE_NS:
-		assert(granule_refcount_read_relaxed(g) == 0UL);
+		assert(granule_refcount_read_relaxed(g) == 0U);
 		break;
 	case GRANULE_STATE_DELEGATED:
-		assert(g->refcount == 0UL);
+		assert(g->refcount == 0U);
 		break;
 	case GRANULE_STATE_RD:
 		/*
@@ -56,17 +56,17 @@
 		 */
 		break;
 	case GRANULE_STATE_REC:
-		assert(granule_refcount_read_relaxed(g) <= 1UL);
+		assert(granule_refcount_read_relaxed(g) <= 1U);
 		break;
 	case GRANULE_STATE_DATA:
-		assert(g->refcount == 0UL);
+		assert(g->refcount == 0U);
 		break;
 	case GRANULE_STATE_RTT:
 		/* Refcount cannot be greater that number of entries in an RTT */
-		assert(g->refcount <= (GRANULE_SIZE / sizeof(uint64_t)));
+		assert(g->refcount <= (unsigned short)(GRANULE_SIZE / sizeof(uint64_t)));
 		break;
 	case GRANULE_STATE_REC_AUX:
-		assert(g->refcount == 0UL);
+		assert(g->refcount == 0U);
 		break;
 	default:
 		/* Unknown granule type */
@@ -75,7 +75,7 @@
 }
 
 /* Must be called with g->lock held */
-static inline enum granule_state granule_get_state(struct granule *g)
+static inline unsigned char granule_get_state(struct granule *g)
 {
 	assert(g != NULL);
 
@@ -84,8 +84,7 @@
 }
 
 /* Must be called with g->lock held */
-static inline void granule_set_state(struct granule *g,
-				     enum granule_state state)
+static inline void granule_set_state(struct granule *g, unsigned char state)
 {
 	assert(g != NULL);
 
@@ -99,12 +98,12 @@
  * Also asserts if invariant conditions are met.
  */
 static inline bool granule_lock_on_state_match(struct granule *g,
-				    enum granule_state expected_state)
+						unsigned char expected_state)
 {
-	spinlock_acquire(&g->lock);
+	byte_spinlock_acquire(&g->lock);
 
 	if (granule_get_state(g) != expected_state) {
-		spinlock_release(&g->lock);
+		byte_spinlock_release(&g->lock);
 		return false;
 	}
 
@@ -117,7 +116,7 @@
  * reference to it). In these cases we should never fail to acquire the lock.
  */
 static inline void granule_lock(struct granule *g,
-				enum granule_state expected_state)
+				unsigned char expected_state)
 {
 	__unused bool locked = granule_lock_on_state_match(g, expected_state);
 
@@ -127,12 +126,12 @@
 static inline void granule_unlock(struct granule *g)
 {
 	__granule_assert_unlocked_invariants(g, granule_get_state(g));
-	spinlock_release(&g->lock);
+	byte_spinlock_release(&g->lock);
 }
 
 /* Transtion state to @new_state and unlock the granule */
 static inline void granule_unlock_transition(struct granule *g,
-					     enum granule_state new_state)
+						unsigned char new_state)
 {
 	granule_set_state(g, new_state);
 	granule_unlock(g);
@@ -142,13 +141,13 @@
 struct granule *addr_to_granule(unsigned long addr);
 struct granule *find_granule(unsigned long addr);
 struct granule *find_lock_granule(unsigned long addr,
-				  enum granule_state expected_state);
+				  unsigned char expected_state);
 
 bool find_lock_two_granules(unsigned long addr1,
-			    enum granule_state expected_state1,
+			    unsigned char expected_state1,
 			    struct granule **g1,
 			    unsigned long addr2,
-			    enum granule_state expected_state2,
+			    unsigned char expected_state2,
 			    struct granule **g2);
 
 void granule_memzero(struct granule *g, enum buffer_slot slot);
@@ -169,19 +168,19 @@
 static inline void __granule_put(struct granule *g)
 {
 	assert(g->lock.val != 0U);
-	assert(g->refcount > 0UL);
+	assert(g->refcount != 0U);
 	g->refcount--;
 }
 
 /* Must be called with g->lock held */
-static inline void __granule_refcount_inc(struct granule *g, unsigned long val)
+static inline void __granule_refcount_inc(struct granule *g, unsigned short val)
 {
 	assert(g->lock.val != 0U);
 	g->refcount += val;
 }
 
 /* Must be called with g->lock held */
-static inline void __granule_refcount_dec(struct granule *g, unsigned long val)
+static inline void __granule_refcount_dec(struct granule *g, unsigned short val)
 {
 	assert(g->lock.val != 0U);
 	assert(g->refcount >= val);
@@ -193,7 +192,7 @@
  */
 static inline void atomic_granule_get(struct granule *g)
 {
-	atomic_add_64(&g->refcount, 1L);
+	atomic_add_16(&g->refcount, 1);
 }
 
 /*
@@ -201,7 +200,7 @@
  */
 static inline void atomic_granule_put(struct granule *g)
 {
-	atomic_add_64(&g->refcount, -1L);
+	atomic_add_16(&g->refcount, (uint16_t)(-1));
 }
 
 /*
@@ -210,10 +209,10 @@
  */
 static inline void atomic_granule_put_release(struct granule *g)
 {
-	unsigned long old_refcount __unused;
+	unsigned short old_refcount __unused;
 
-	old_refcount = atomic_load_add_release_64(&g->refcount, -1L);
-	assert(old_refcount > 0UL);
+	old_refcount = atomic_load_add_release_16(&g->refcount, (uint16_t)(-1));
+	assert(old_refcount != 0U);
 }
 
 /*
@@ -232,7 +231,7 @@
  *	if the granule at @addr has a non-zero reference count.
  */
 static inline int find_lock_unused_granule(unsigned long addr,
-					   enum granule_state expected_state,
+					   unsigned char expected_state,
 					   struct granule **g)
 {
 	*g = find_lock_granule(addr, expected_state);
@@ -244,7 +243,7 @@
 	 * Granules can have lock-free access (e.g. REC), thus using acquire
 	 * semantics to avoid race conditions.
 	 */
-	if (granule_refcount_read_acquire(*g) != 0UL) {
+	if (granule_refcount_read_acquire(*g) != 0U) {
 		granule_unlock(*g);
 		*g = NULL;
 		return -EBUSY;
diff --git a/lib/realm/include/granule_types.h b/lib/realm/include/granule_types.h
index 6aaac4b..cc42c01 100644
--- a/lib/realm/include/granule_types.h
+++ b/lib/realm/include/granule_types.h
@@ -65,134 +65,137 @@
  * locked these may contain non-zero values.
  */
 
-enum granule_state {
-	/*
-	 * Non-Secure granule (external)
-	 *
-	 * Granule content is not protected by granule::lock, as it is always
-	 * subject to reads and writes from the NS world.
-	 */
-	GRANULE_STATE_NS,
-	/*
-	 * Delegated Granule (external)
-	 *
-	 * Granule content is protected by granule::lock.
-	 *
-	 * No references are held on this granule type.
-	 */
-	GRANULE_STATE_DELEGATED,
-	/*
-	 * Realm Descriptor Granule (external)
-	 *
-	 * Granule content is protected by granule::lock.
-	 *
-	 * A reference is held on this granule:
-	 * - For each associated REC granule.
-	 *
-	 * The RD may only be destroyed when the following objects
-	 * have a reference count of zero:
-	 * - The root-level RTT
-	 */
-	GRANULE_STATE_RD,
-	/*
-	 * Realm Execution Context Granule (external)
-	 *
-	 * Granule content (see struct rec) comprises execution
-	 * context state and cached realm information copied from the RD.
-	 *
-	 * Execution context is not protected by granule::lock, because we can't
-	 * enter a Realm while holding the lock.
-	 *
-	 * The following rules with respect to the granule's reference apply:
-	 * - A reference is held on this granule when a REC is running.
-	 * - As REC cannot be run on two PEs at the same time, the maximum
-	 *   value of the reference count is one.
-	 * - When the REC in entered, the reference count is incremented
-	 *   (set to 1) atomically while granule::lock is held.
-	 * - When the REC exits, the reference counter is released (set to 0)
-	 *   atomically with store-release semantics without granule::lock being
-	 *   held.
-	 * - The RMM can access the granule's content on the entry and exit path
-	 *   from the REC while the reference is held.
-	 */
-	GRANULE_STATE_REC,
-	/*
-	 * Realm Execution Context auxiliary granule (internal)
-	 *
-	 * Granule auxiliary content is used to store any state that cannot
-	 * fit in the main REC page. This is typically used for context
-	 * save/restore of PE features like SVE, SME, etc.
-	 *
-	 * Granule content is not protected by granule::lock nor the reference
-	 * count. The RMM can access the content of the auxiliary granules
-	 * only while holding a lock or reference to the parent REC granule.
-	 *
-	 * The granule::lock is held during a state change to
-	 * GRANULE_STATE_REC_AUX and from GRANULE_STATE_REC_AUX.
-	 *
-	 * The complete internal locking order when changing REC_AUX
-	 * granule's state is:
-	 *
-	 * REC -> REC_AUX[0] -> REC_AUX[1] -> ... -> REC_AUX[n-1]
-	 */
-	GRANULE_STATE_REC_AUX,
+/*
+ * Non-Secure granule (external)
+ *
+ * Granule content is not protected by granule::lock, as it is always
+ * subject to reads and writes from the NS world.
+ */
+#define GRANULE_STATE_NS		0U
 
-	/*
-	 * Data Granule (internal)
-	 *
-	 * Granule content is not protected by granule::lock, as it is always
-	 * subject to reads and writes from within a Realm.
-	 *
-	 * A granule in this state is always referenced from exactly one entry
-	 * in an RTT granule which must be locked before locking this granule.
-	 * Only a single DATA granule can be locked at a time.
-	 * The complete internal locking order for DATA granules is:
-	 *
-	 * RD -> RTT -> RTT -> ... -> DATA
-	 *
-	 * No references are held on this granule type.
-	 */
-	GRANULE_STATE_DATA,
-	/*
-	 * RTT Granule (internal)
-	 *
-	 * Granule content is protected by granule::lock.
-	 *
-	 * Granule content is protected by granule::lock, but hardware
-	 * translation table walks may read the RTT at any point in time.
-	 * TODO: do we wish/need to use hardware access flag management?
-	 *
-	 * Multiple granules in this state can only be locked at the same time
-	 * if they are part of the same tree, and only in topological order
-	 * from root to leaf. The topological order of concatenated root level
-	 * RTTs is from lowest address to highest address.
-	 *
-	 * The complete internal locking order for RTT granules is:
-	 *
-	 * RD -> [RTT] -> ... -> RTT
-	 *
-	 * A reference is held on this granule for each entry in the RTT that
-	 * refers to a granule:
-	 *   - Table s2tte.
-	 *   - Assigned_RAM s2tte.
-	 *   - Assigned_NS s2tte.
-	 *   - Assigned s2tte.
-	 */
-	GRANULE_STATE_RTT,
-	GRANULE_STATE_LAST = GRANULE_STATE_RTT
-};
+/*
+ * Delegated Granule (external)
+ *
+ * Granule content is protected by granule::lock.
+ *
+ * No references are held on this granule type.
+ */
+#define GRANULE_STATE_DELEGATED		1U
+
+/*
+ * Realm Descriptor Granule (external)
+ *
+ * Granule content is protected by granule::lock.
+ *
+ * A reference is held on this granule:
+ * - For each associated REC granule.
+ *
+ * The RD may only be destroyed when the following objects
+ * have a reference count of zero:
+ * - The root-level RTT
+ */
+#define GRANULE_STATE_RD		2U
+
+/*
+ * Realm Execution Context Granule (external)
+ *
+ * Granule content (see struct rec) comprises execution
+ * context state and cached realm information copied from the RD.
+ *
+ * Execution context is not protected by granule::lock, because we can't
+ * enter a Realm while holding the lock.
+ *
+ * The following rules with respect to the granule's reference apply:
+ * - A reference is held on this granule when a REC is running.
+ * - As REC cannot be run on two PEs at the same time, the maximum
+ *   value of the reference count is one.
+ * - When the REC in entered, the reference count is incremented
+ *   (set to 1) atomically while granule::lock is held.
+ * - When the REC exits, the reference counter is released (set to 0)
+ *   atomically with store-release semantics without granule::lock being
+ *   held.
+ * - The RMM can access the granule's content on the entry and exit path
+ *   from the REC while the reference is held.
+ */
+#define GRANULE_STATE_REC		3U
+
+/*
+ * Realm Execution Context auxiliary granule (internal)
+ *
+ * Granule auxiliary content is used to store any state that cannot
+ * fit in the main REC page. This is typically used for context
+ * save/restore of PE features like SVE, SME, etc.
+ *
+ * Granule content is not protected by granule::lock nor the reference
+ * count. The RMM can access the content of the auxiliary granules
+ * only while holding a lock or reference to the parent REC granule.
+ *
+ * The granule::lock is held during a state change to
+ * GRANULE_STATE_REC_AUX and from GRANULE_STATE_REC_AUX.
+ *
+ * The complete internal locking order when changing REC_AUX
+ * granule's state is:
+ *
+ * REC -> REC_AUX[0] -> REC_AUX[1] -> ... -> REC_AUX[n-1]
+ */
+#define GRANULE_STATE_REC_AUX		4U
+
+/*
+ * Data Granule (internal)
+ *
+ * Granule content is not protected by granule::lock, as it is always
+ * subject to reads and writes from within a Realm.
+ *
+ * A granule in this state is always referenced from exactly one entry
+ * in an RTT granule which must be locked before locking this granule.
+ * Only a single DATA granule can be locked at a time.
+ * The complete internal locking order for DATA granules is:
+ *
+ * RD -> RTT -> RTT -> ... -> DATA
+ *
+ * No references are held on this granule type.
+ */
+#define GRANULE_STATE_DATA		5U
+
+/*
+ * RTT Granule (internal)
+ *
+ * Granule content is protected by granule::lock.
+ *
+ * Granule content is protected by granule::lock, but hardware
+ * translation table walks may read the RTT at any point in time.
+ * TODO: do we wish/need to use hardware access flag management?
+ *
+ * Multiple granules in this state can only be locked at the same time
+ * if they are part of the same tree, and only in topological order
+ * from root to leaf. The topological order of concatenated root level
+ * RTTs is from lowest address to highest address.
+ *
+ * The complete internal locking order for RTT granules is:
+ *
+ * RD -> [RTT] -> ... -> RTT
+ *
+ * A reference is held on this granule for each entry in the RTT that
+ * refers to a granule:
+ *   - Table s2tte.
+ *   - Assigned_RAM s2tte.
+ *   - Assigned_NS s2tte.
+ *   - Assigned s2tte.
+ */
+#define GRANULE_STATE_RTT		6U
+#define GRANULE_STATE_LAST		GRANULE_STATE_RTT
 
 struct granule {
 	/*
 	 * @lock protects the struct granule itself. Take this lock whenever
 	 * inspecting or modifying any other fields in this struct.
 	 */
-	spinlock_t lock;
+	byte_spinlock_t lock;
 
 	/*
 	 * @state is the state of the granule.
 	 */
-	enum granule_state state;
+	unsigned char state;
 
 	/*
 	 * @refcount counts RMM and realm references to this granule with the
@@ -206,7 +209,7 @@
 	 *    types of granules may impose further restrictions on concurrent
 	 *    access.
 	 */
-	unsigned long refcount;
+	unsigned short refcount;
 };
 
 #endif /* GRANULE_TYPES_H */
diff --git a/lib/realm/src/granule.c b/lib/realm/src/granule.c
index e51df09..94e5d49 100644
--- a/lib/realm/src/granule.c
+++ b/lib/realm/src/granule.c
@@ -112,7 +112,7 @@
  *	@expected_state.
  */
 struct granule *find_lock_granule(unsigned long addr,
-				  enum granule_state expected_state)
+				  unsigned char expected_state)
 {
 	struct granule *g;
 
@@ -130,9 +130,9 @@
 
 struct granule_set {
 	unsigned long addr;
-	enum granule_state state;
 	struct granule *g;
 	struct granule **g_ret;
+	unsigned char state;
 };
 
 /*
@@ -222,15 +222,15 @@
  */
 bool find_lock_two_granules(
 			unsigned long addr1,
-			enum granule_state expected_state1,
+			unsigned char expected_state1,
 			struct granule **g1,
 			unsigned long addr2,
-			enum granule_state expected_state2,
+			unsigned char expected_state2,
 			struct granule **g2)
 {
 	struct granule_set gs[] = {
-		{addr1, expected_state1, NULL, g1},
-		{addr2, expected_state2, NULL, g2}
+		{addr1, NULL, g1, expected_state1},
+		{addr2, NULL, g2, expected_state2}
 	};
 
 	assert((g1 != NULL) && (g2 != NULL));
diff --git a/lib/realm/tests/granule.cpp b/lib/realm/tests/granule.cpp
index 56ec940..bd42283 100644
--- a/lib/realm/tests/granule.cpp
+++ b/lib/realm/tests/granule.cpp
@@ -23,12 +23,14 @@
 #include <utils_def.h>
 }
 
+#define SHORTS_EQUAL(expected, actual)	\
+	LONGS_EQUAL((expected) & 0xffff, (actual) & 0xffff)
+
 /* Function to get a random granule index in the range [1, NR_GRANULES - 2] */
 static inline unsigned int get_rand_granule_idx(void)
 {
 	return (unsigned int)test_helpers_get_rand_in_range(1UL,
 					test_helpers_get_nr_granules() - 2U);
-
 }
 
 /* Function to get the index of the last granule in the system */
@@ -98,8 +100,8 @@
  */
 static inline unsigned int set_rand_non_zero_lock_value(struct granule *granule)
 {
-	unsigned int lock =
-		(unsigned int)test_helpers_get_rand_in_range(1UL, INT_MAX);
+	unsigned char lock =
+		(unsigned char)test_helpers_get_rand_in_range(1UL, UCHAR_MAX);
 
 	granule->lock.val = lock;
 	return lock;
@@ -107,7 +109,6 @@
 
 TEST_GROUP(granule) {
 
-
 	TEST_SETUP()
 	{
 		test_helpers_init();
@@ -344,9 +345,9 @@
 {
 	struct granule *granule;
 	unsigned long addr = get_rand_granule_addr();
-	unsigned long val =
-		(unsigned long)test_helpers_get_rand_in_range(10UL, INT_MAX);
-	unsigned long read_val;
+	unsigned short val =
+		(unsigned short)test_helpers_get_rand_in_range(1UL, USHRT_MAX);
+	unsigned short read_val;
 
 	/******************************************************************
 	 * TEST CASE 1:
@@ -377,9 +378,9 @@
 {
 	struct granule *granule;
 	unsigned long addr = get_rand_granule_addr();
-	unsigned long val =
-		(unsigned long)test_helpers_get_rand_in_range(10UL, 10000UL);
-	unsigned long read_val;
+	unsigned short val =
+		(unsigned short)test_helpers_get_rand_in_range(10UL, USHRT_MAX);
+	unsigned short read_val;
 
 	/******************************************************************
 	 * TEST CASE 1:
@@ -698,10 +699,10 @@
 	g1 = NULL;
 	g2 = NULL;
 
-	for (unsigned int state1 = GRANULE_STATE_NS;
+	for (unsigned char state1 = GRANULE_STATE_NS;
 	     state1 <= GRANULE_STATE_LAST; state1++) {
 
-		for (unsigned int state2 = GRANULE_STATE_NS;
+		for (unsigned char state2 = GRANULE_STATE_NS;
 		     state2 <= GRANULE_STATE_LAST; state2++) {
 			if (state1 == GRANULE_STATE_NS &&
 			    state2 == GRANULE_STATE_NS) {
@@ -711,9 +712,9 @@
 				 */
 				continue;
 			}
-			retval = find_lock_two_granules(addr1,
-					(enum granule_state)state1, &g1,
-					addr2, (enum granule_state)state2, &g2);
+			retval = find_lock_two_granules(
+					addr1, state1, &g1,
+					addr2, state2, &g2);
 
 			CHECK_FALSE(retval);
 
@@ -823,10 +824,9 @@
 	 * granules in between.
 	 ***************************************************************/
 	for (unsigned int i = 0U; i < 3U; i++) {
-		for (unsigned int state = GRANULE_STATE_NS + 1U;
+		for (unsigned char state = GRANULE_STATE_NS + 1U;
 		     state <= GRANULE_STATE_LAST; state++) {
-			granule = find_lock_granule(addrs[i],
-						    (enum granule_state)state);
+			granule = find_lock_granule(addrs[i], state);
 			POINTERS_EQUAL(NULL, granule);
 		}
 	}
@@ -845,10 +845,9 @@
 	 ***************************************************************/
 	addr = get_rand_granule_addr();
 	addr += test_helpers_get_rand_in_range(1UL, GRANULE_SIZE - 1);
-	for (unsigned int state = GRANULE_STATE_NS;
+	for (unsigned char state = GRANULE_STATE_NS;
 	     state <= GRANULE_STATE_LAST; state++) {
-		granule = find_lock_granule(addr,
-					    (enum granule_state)state);
+		granule = find_lock_granule(addr, state);
 		POINTERS_EQUAL(NULL, granule);
 	}
 }
@@ -866,17 +865,14 @@
 	 ***************************************************************/
 	(void)get_out_of_range_granule(&addr, true);
 
-	for (unsigned int state = GRANULE_STATE_NS;
+	for (unsigned char state = GRANULE_STATE_NS;
 	     state <= GRANULE_STATE_LAST; state++) {
-		granule = find_lock_granule(addr,
-					    (enum granule_state)state);
+		granule = find_lock_granule(addr, state);
 		POINTERS_EQUAL(NULL, granule);
 
 		/* Try the lower boundary as well */
 		if (get_out_of_range_granule(&addr, false) == true) {
-
-			granule = find_lock_granule(addr,
-						    (enum granule_state)state);
+			granule = find_lock_granule(addr, state);
 			POINTERS_EQUAL(NULL, granule);
 		}
 	}
@@ -903,17 +899,17 @@
 	for (unsigned int i = 0U; i < 3U; i++) {
 		granule = addr_to_granule(addrs[i]);
 
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST; state++) {
 
 			/* Ensure the granule is unlocked */
 			granule_unlock(granule);
 
 			/* Set the granule state */
-			granule_set_state(granule, (enum granule_state)state);
+			granule_set_state(granule, state);
 
 			/* Lock the granule */
-			granule_lock(granule, (enum granule_state)state);
+			granule_lock(granule, state);
 			CHECK_FALSE(granule->lock.val == 0);
 		}
 	}
@@ -925,7 +921,7 @@
 	 * with invalid granules.
 	 *
 	 * In addition to that, granule_lock() also expects that the expected
-	 * state belongs to enum granule_state so it doesn't perform any checks
+	 * state belongs to the defined values so it doesn't perform any checks
 	 * on that either.
 	 */
 }
@@ -933,7 +929,7 @@
 ASSERT_TEST(granule, granule_lock_TC2)
 {
 	struct granule *granule;
-	unsigned int state, expected;
+	unsigned char state, expected;
 	unsigned long addr = (get_rand_granule_idx() * GRANULE_SIZE) +
 					host_util_get_granule_base();
 
@@ -946,10 +942,10 @@
 
 	granule = addr_to_granule(addr);
 	do {
-		state = (unsigned int)test_helpers_get_rand_in_range(
+		state = (unsigned char)test_helpers_get_rand_in_range(
 					(unsigned long)GRANULE_STATE_NS,
 					(unsigned long)GRANULE_STATE_LAST);
-		expected = (unsigned int)test_helpers_get_rand_in_range(
+		expected = (unsigned char)test_helpers_get_rand_in_range(
 					(unsigned long)GRANULE_STATE_NS,
 					(unsigned long)GRANULE_STATE_LAST);
 	} while (state == expected);
@@ -958,11 +954,11 @@
 	granule_unlock(granule);
 
 	/* Set the granule state */
-	granule_set_state(granule, (enum granule_state)state);
+	granule_set_state(granule, state);
 
 	test_helpers_expect_assert_fail(true);
 	/* Lock the granule */
-	granule_lock(granule, (enum granule_state)expected);
+	granule_lock(granule, expected);
 	test_helpers_fail_if_no_assert_failed();
 }
 
@@ -987,7 +983,7 @@
 	for (unsigned int i = 0U; i < 3U; i++) {
 		granule = addr_to_granule(addrs[i]);
 
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST; state++) {
 			bool retval;
 
@@ -995,11 +991,10 @@
 			granule_unlock(granule);
 
 			/* Set the granule state */
-			granule_set_state(granule, (enum granule_state)state);
+			granule_set_state(granule, state);
 
 			/* Lock the granule */
-			retval = granule_lock_on_state_match(granule,
-						(enum granule_state)state);
+			retval = granule_lock_on_state_match(granule, state);
 			CHECK(retval);
 			CHECK_FALSE(granule->lock.val == 0);
 		}
@@ -1028,12 +1023,12 @@
 	for (unsigned int i = 0U; i < 3U; i++) {
 		granule = addr_to_granule(addrs[i]);
 
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST; state++) {
 			/* Set the granule state */
-			granule_set_state(granule, (enum granule_state)state);
+			granule_set_state(granule, state);
 
-			for (unsigned int lock_state = GRANULE_STATE_NS;
+			for (unsigned char lock_state = GRANULE_STATE_NS;
 			     lock_state <= GRANULE_STATE_LAST; lock_state++) {
 				bool retval;
 
@@ -1047,7 +1042,7 @@
 
 				/* Lock the granule */
 				retval = granule_lock_on_state_match(granule,
-					(enum granule_state)lock_state);
+								lock_state);
 				CHECK_FALSE(retval);
 				CHECK_EQUAL(0, granule->lock.val);
 			}
@@ -1061,7 +1056,7 @@
 	 * with invalid granules.
 	 *
 	 * Likewise, it also expects that the next state belongs to
-	 * enum granule_state, so it doesn't perform any checks on that either.
+	 * the defined values, so it doesn't perform any checks on that either.
 	 */
 }
 
@@ -1084,19 +1079,17 @@
 	 * granules in between.
 	 ***************************************************************/
 	for (unsigned int i = 0U; i < 3U; i++) {
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST;
 		     state++) {
-			unsigned int next_state = (state + 1) %
+			unsigned char next_state = (state + 1) %
 						((int)GRANULE_STATE_LAST + 1);
 
 			/* Find and lock a granule */
-			granule = find_lock_granule(addrs[i],
-						    (enum granule_state)state);
+			granule = find_lock_granule(addrs[i], state);
 
 			/* Change the granule state */
-			granule_set_state(granule,
-					  (enum granule_state)next_state);
+			granule_set_state(granule, next_state);
 
 			/* Check that the state is correct */
 			CHECK_EQUAL(next_state, granule_get_state(granule));
@@ -1119,7 +1112,7 @@
 	 * with invalid granules.
 	 *
 	 * Likewise, it also expects that the next state belongs to
-	 * enum granule_state, so it doesn't perform any checks on that either.
+	 * the defined values, so it doesn't perform any checks on that either.
 	 */
 }
 
@@ -1143,7 +1136,7 @@
 	 * granules in between.
 	 ***************************************************************/
 	for (unsigned int i = 0U; i < 3U; i++) {
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST;
 		     state++) {
 
@@ -1151,7 +1144,7 @@
 			granule = find_lock_granule(addrs[i], GRANULE_STATE_NS);
 
 			/* Change the state of the granule */
-			granule_set_state(granule, (enum granule_state)state);
+			granule_set_state(granule, state);
 
 			/* Unlock the granule */
 			granule_unlock(granule);
@@ -1200,19 +1193,17 @@
 	 * granules in between.
 	 ***************************************************************/
 	for (unsigned int i = 0U; i < 3U; i++) {
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 		     state <= GRANULE_STATE_LAST;
 		     state++) {
-			unsigned int next_state = (state + 1) %
+			unsigned char next_state = (state + 1) %
 						((int)GRANULE_STATE_LAST + 1);
 
 			/* Find and lock a granule */
-			granule = find_lock_granule(addrs[i],
-						    (enum granule_state)state);
+			granule = find_lock_granule(addrs[i], state);
 
 			/* Unlock the granule changing its state */
-			granule_unlock_transition(granule,
-					(enum granule_state)next_state);
+			granule_unlock_transition(granule, next_state);
 
 			/* Check that the state is correct */
 			CHECK_EQUAL(next_state, granule_get_state(granule));
@@ -1227,7 +1218,7 @@
 	 * with invalid granules.
 	 *
 	 * Likewise, it also expects that the next state belongs to
-	 * enum granule_state, so it doesn't perform any checks on that either.
+	 * the defined values, so it doesn't perform any checks on that either.
 	 */
 }
 
@@ -1319,7 +1310,7 @@
 {
 	unsigned long address = get_rand_granule_addr();
 	struct granule *granule = find_granule(address);
-	unsigned long val = test_helpers_get_rand_in_range(1UL, INT_MAX);
+	unsigned short val = test_helpers_get_rand_in_range(1U, USHRT_MAX);
 
 	unsigned int lock = set_rand_non_zero_lock_value(granule);
 
@@ -1389,9 +1380,9 @@
 	 * The refcount before the test starts is expected to be 0.
 	 ******************************************************************/
 	__granule_refcount_inc(granule, val);
-	__granule_refcount_dec(granule, val - 1UL);
+	__granule_refcount_dec(granule, val - 1U);
 
-	LONGS_EQUAL(1, granule->refcount);
+	SHORTS_EQUAL(1, granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1407,7 +1398,7 @@
 {
 	unsigned long address = get_rand_granule_addr();
 	struct granule *granule = find_granule(address);
-	unsigned long val = test_helpers_get_rand_in_range(10UL, INT_MAX - 1U);
+	unsigned short val = (unsigned short)test_helpers_get_rand_in_range(1U, USHRT_MAX - 1U);
 
 	set_rand_non_zero_lock_value(granule);
 
@@ -1438,7 +1429,7 @@
 	 ******************************************************************/
 	atomic_granule_get(granule);
 
-	LONGS_EQUAL(1, granule->refcount);
+	SHORTS_EQUAL(1, granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1466,7 +1457,7 @@
 	atomic_granule_get(granule);
 	atomic_granule_put(granule);
 
-	LONGS_EQUAL(0, granule->refcount);
+	SHORTS_EQUAL(0, granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1494,7 +1485,7 @@
 	}
 	atomic_granule_put(granule);
 
-	LONGS_EQUAL((get_count - 1UL), granule->refcount);
+	SHORTS_EQUAL((get_count - 1U), granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1522,7 +1513,7 @@
 	atomic_granule_get(granule);
 	atomic_granule_put_release(granule);
 
-	LONGS_EQUAL(0, granule->refcount);
+	SHORTS_EQUAL(0, granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1544,13 +1535,13 @@
 	 *
 	 * The refcount before the test starts is expected to be 0.
 	 ******************************************************************/
-	get_count = (unsigned int)test_helpers_get_rand_in_range(10UL, 1000UL);
+	get_count = (unsigned short)test_helpers_get_rand_in_range(10UL, 1000UL);
 	for (unsigned int i = 0; i < get_count; i++) {
 		atomic_granule_get(granule);
 	}
 	atomic_granule_put_release(granule);
 
-	LONGS_EQUAL((get_count - 1UL), granule->refcount);
+	SHORTS_EQUAL((get_count - 1L), granule->refcount);
 
 	/* Verify that not other parameters of the granule are altered */
 	CHECK_EQUAL(0, granule->state);
@@ -1663,7 +1654,7 @@
 		 */
 		granule_set_state(granule, GRANULE_STATE_RD);
 
-		for (unsigned int state = GRANULE_STATE_NS;
+		for (unsigned char state = GRANULE_STATE_NS;
 			state <= GRANULE_STATE_LAST; state++) {
 			if (state == GRANULE_STATE_RD) {
 				/* Skip as the state is the correct one */
@@ -1671,7 +1662,7 @@
 			}
 
 			ret = find_lock_unused_granule(addrs[i],
-						(enum granule_state)state,
+						state,
 						&granule);
 
 			CHECK_TRUE(ret == -EINVAL);
diff --git a/plat/host/common/src/host_harness_cmn.c b/plat/host/common/src/host_harness_cmn.c
index 121664e..e67e3aa 100644
--- a/plat/host/common/src/host_harness_cmn.c
+++ b/plat/host/common/src/host_harness_cmn.c
@@ -224,7 +224,6 @@
 	 * sequence.
 	 */
 	assert(l->val == 0);
-
 	l->val = 1;
 }
 
@@ -233,6 +232,18 @@
 	l->val = 0;
 }
 
+void host_byte_spinlock_acquire(byte_spinlock_t *l)
+{
+	assert(l->val == 0);
+	l->val = 1;
+}
+
+void host_byte_spinlock_release(byte_spinlock_t *l)
+{
+	l->val = 0;
+}
+
+
 u_register_t host_read_sysreg(char *reg_name)
 {
 	struct sysreg_cb *callbacks = host_util_get_sysreg_cb(reg_name);
diff --git a/plat/host/host_cbmc/include/tb_granules.h b/plat/host/host_cbmc/include/tb_granules.h
index a1971fa..4dabfce 100644
--- a/plat/host/host_cbmc/include/tb_granules.h
+++ b/plat/host/host_cbmc/include/tb_granules.h
@@ -36,7 +36,7 @@
 
 struct SPEC_granule {
 	enum granule_gpt gpt;
-	enum granule_state state;
+	unsigned char state;
 };
 
 /*
diff --git a/plat/host/host_cbmc/src/tb_granules.c b/plat/host/host_cbmc/src/tb_granules.c
index a7281cd..f68de36 100644
--- a/plat/host/host_cbmc/src/tb_granules.c
+++ b/plat/host/host_cbmc/src/tb_granules.c
@@ -15,7 +15,7 @@
 #include "tb_granules.h"
 
 /* Chooses an arbitrary granule state. */
-bool valid_granule_state(enum granule_state value)
+bool valid_granule_state(unsigned char value)
 {
 	return value == GRANULE_STATE_NS
 			|| value == GRANULE_STATE_DELEGATED
@@ -56,7 +56,8 @@
 {
 	struct granule rst = nondet_struct_granule();
 
-	__CPROVER_assume(__CPROVER_enum_is_in_range(rst.state));
+	__CPROVER_assume((rst.state >= GRANULE_STATE_NS) &&
+			 (rst.state <= GRANULE_STATE_LAST));
 	__CPROVER_assume(valid_granule(rst));
 	return rst;
 }
@@ -87,7 +88,8 @@
 {
 	if (!valid_pa(addr)) {
 		struct SPEC_granule nd_granule = nondet_struct_SPEC_granule();
-		__CPROVER_assume(__CPROVER_enum_is_in_range(nd_granule.state));
+		__CPROVER_assume((nd_granule.state >= GRANULE_STATE_NS) &&
+				 (nd_granule.state <= GRANULE_STATE_LAST));
 		__CPROVER_assume(__CPROVER_enum_is_in_range(nd_granule.gpt));
 		return nd_granule;
 	}
diff --git a/runtime/core/init.c b/runtime/core/init.c
index 69783e9..fe22b5b 100644
--- a/runtime/core/init.c
+++ b/runtime/core/init.c
@@ -13,7 +13,6 @@
 #include <smc-rmi.h>
 #include <smc-rsi.h>
 
-
 #ifdef NDEBUG
 #define RMM_BUILD_TYPE	"release"
 #else
diff --git a/runtime/rmi/granule.c b/runtime/rmi/granule.c
index 701c5cf..c115fd6 100644
--- a/runtime/rmi/granule.c
+++ b/runtime/rmi/granule.c
@@ -10,6 +10,11 @@
 #include <smc-rmi.h>
 #include <smc.h>
 
+COMPILER_ASSERT(sizeof(struct granule) == (
+		SIZE_OF(granule, lock) +
+		SIZE_OF(granule, state) +
+		SIZE_OF(granule, refcount)));
+
 unsigned long smc_granule_delegate(unsigned long addr)
 {
 	struct granule *g;
diff --git a/runtime/rmi/rec.c b/runtime/rmi/rec.c
index fe79afb..3775670 100644
--- a/runtime/rmi/rec.c
+++ b/runtime/rmi/rec.c
@@ -256,7 +256,7 @@
 	struct rd *rd;
 	struct rmi_rec_params rec_params;
 	unsigned long rec_idx;
-	enum granule_state new_rec_state = GRANULE_STATE_DELEGATED;
+	unsigned char new_rec_state = GRANULE_STATE_DELEGATED;
 	unsigned long ret;
 	bool ns_access_ok;
 	unsigned int num_rec_aux;
@@ -484,7 +484,7 @@
 	 * members of REC structure (such as rec->running) only if the counter
 	 * is zero.
 	 */
-	if (granule_refcount_read_acquire(g_calling_rec) != 0UL) {
+	if (granule_refcount_read_acquire(g_calling_rec) != 0U) {
 		/*
 		 * The `calling` REC is running on another PE and therefore it
 		 * may not have a pending PSCI request.
diff --git a/runtime/rmi/rtt.c b/runtime/rmi/rtt.c
index ab78920..9abfd7d 100644
--- a/runtime/rmi/rtt.c
+++ b/runtime/rmi/rtt.c
@@ -174,7 +174,7 @@
 		 * Increase the refcount to mark the granule as in-use. refcount
 		 * is incremented by S2TTES_PER_S2TT (ref RTT unfolding).
 		 */
-		__granule_refcount_inc(g_tbl, S2TTES_PER_S2TT);
+		__granule_refcount_inc(g_tbl, (unsigned short)S2TTES_PER_S2TT);
 
 	} else if (s2tte_is_assigned_empty(&s2_ctx, parent_s2tte, level - 1L)) {
 		unsigned long block_pa;
@@ -193,7 +193,7 @@
 		 * Increase the refcount to mark the granule as in-use. refcount
 		 * is incremented by S2TTES_PER_S2TT (ref RTT unfolding).
 		 */
-		__granule_refcount_inc(g_tbl, S2TTES_PER_S2TT);
+		__granule_refcount_inc(g_tbl, (unsigned short)S2TTES_PER_S2TT);
 
 	} else if (s2tte_is_assigned_ram(&s2_ctx, parent_s2tte, level - 1L)) {
 		unsigned long block_pa;
@@ -218,7 +218,7 @@
 		 * Increase the refcount to mark the granule as in-use. refcount
 		 * is incremented by S2TTES_PER_S2TT (ref RTT unfolding).
 		 */
-		__granule_refcount_inc(g_tbl, S2TTES_PER_S2TT);
+		__granule_refcount_inc(g_tbl, (unsigned short)S2TTES_PER_S2TT);
 
 	} else if (s2tte_is_assigned_ns(&s2_ctx, parent_s2tte, level - 1L)) {
 		unsigned long block_pa;
@@ -371,7 +371,7 @@
 			goto out_unmap_table;
 		}
 		__granule_put(wi.g_llt);
-	} else if (g_tbl->refcount == S2TTES_PER_S2TT) {
+	} else if (g_tbl->refcount == (unsigned short)S2TTES_PER_S2TT) {
 
 		unsigned long s2tte, block_pa;
 
@@ -416,7 +416,7 @@
 			goto out_unmap_table;
 		}
 
-		__granule_refcount_dec(g_tbl, S2TTES_PER_S2TT);
+		__granule_refcount_dec(g_tbl, (unsigned short)S2TTES_PER_S2TT);
 	} else {
 		/*
 		 * The table holds a mixture of different types of s2ttes.
@@ -871,7 +871,7 @@
 	struct s2tt_walk wi;
 	struct s2tt_context *s2_ctx;
 	unsigned long s2tte, *s2tt;
-	enum granule_state new_data_state = GRANULE_STATE_DELEGATED;
+	unsigned char new_data_state = GRANULE_STATE_DELEGATED;
 	unsigned long ret;
 
 	if (!find_lock_two_granules(data_addr,
@@ -1365,7 +1365,7 @@
 		return;
 	}
 
-	if (granule_refcount_read_acquire(g_rec) != 0UL) {
+	if (granule_refcount_read_acquire(g_rec) != 0U) {
 		res->x[0] = RMI_ERROR_REC;
 		goto out_unlock_rec_rd;
 	}
diff --git a/runtime/rsi/psci.c b/runtime/rsi/psci.c
index 1b42da1..fa2401a 100644
--- a/runtime/rsi/psci.c
+++ b/runtime/rsi/psci.c
@@ -335,7 +335,7 @@
 					  unsigned long caller_sctlr_el1,
 					  unsigned long status)
 {
-	if ((granule_refcount_read_acquire(target_rec->g_rec) != 0UL) ||
+	if ((granule_refcount_read_acquire(target_rec->g_rec) != 0U) ||
 		target_rec->runnable) {
 		return PSCI_RETURN_ALREADY_ON;
 	}
@@ -357,7 +357,7 @@
 
 static unsigned long complete_psci_affinity_info(struct rec *target_rec)
 {
-	if ((granule_refcount_read_acquire(target_rec->g_rec) != 0UL) ||
+	if ((granule_refcount_read_acquire(target_rec->g_rec) != 0U) ||
 		target_rec->runnable) {
 		return PSCI_AFFINITY_INFO_ON;
 	}