feat(lib/realm): modify struct granule

This patch makes the following changes:
- val of spinlock_t type changes from unsigned int
 to unsigned char;
- 'enum granule_state' removed and replaced with
 macro definitions;
- type of 'struct granule' state changed to unsigned char;
- type of 'struct granule' refcount changed from unsigned long
  to unsigned short, as its maximum value cannot exceed 512.
- 'refcount' access functions modified to use 16-bit values.
These modifications change the size of 'struct granule' from
16 to 4 bytes and reduce the total size of 'granules[]'
array from 16MB to 4MB for default RMM_MAX_GRANULES = 0x100000.

Change-Id: I611ed3f349806b033e03c52c5f60a0f903f93e11
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/arch/include/aarch64/atomics.h b/lib/arch/include/aarch64/atomics.h
index 8c80503..b68c761 100644
--- a/lib/arch/include/aarch64/atomics.h
+++ b/lib/arch/include/aarch64/atomics.h
@@ -12,7 +12,7 @@
 /*
  * Atomically adds @val to the 64-bit value stored at memory location @loc.
  */
-static inline void atomic_add_64(uint64_t *loc, long val)
+static inline void atomic_add_64(uint64_t *loc, uint64_t val)
 {
 	asm volatile(
 	"	stadd %[val], %[loc]\n"
@@ -26,9 +26,9 @@
  * Stores to memory with release semantics.
  * Returns the old value.
  */
-static inline unsigned long atomic_load_add_release_64(uint64_t *loc, long val)
+static inline uint64_t atomic_load_add_release_64(uint64_t *loc, uint64_t val)
 {
-	unsigned long old_val;
+	uint64_t old_val;
 
 	asm volatile(
 	"	ldaddl %[val], %[old_val], %[loc]\n"
@@ -41,6 +41,37 @@
 }
 
 /*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ */
+static inline void atomic_add_16(uint16_t *loc, uint16_t val)
+{
+	asm volatile(
+	"	staddh %w[val], %[loc]\n"
+	: [loc] "+Q" (*loc)
+	: [val] "r" (val)
+	: "memory");
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ * Stores to memory with release semantics.
+ * Returns the old value.
+ */
+static inline uint16_t atomic_load_add_release_16(uint16_t *loc, uint16_t val)
+{
+	uint16_t old_val;
+
+	asm volatile(
+	"	ldaddlh %w[val], %w[old_val], %[loc]\n"
+	: [loc] "+Q" (*loc),
+	  [old_val] "=r" (old_val)
+	: [val] "r" (val)
+	: "memory");
+
+	return old_val;
+}
+
+/*
  * Atomically set bit @bit in value pointed to by @loc with release semantics.
  */
 static inline void atomic_bit_set_release_64(uint64_t *loc, unsigned int bit)
diff --git a/lib/arch/include/aarch64/memory.h b/lib/arch/include/aarch64/memory.h
index 34fe271..2de3b03 100644
--- a/lib/arch/include/aarch64/memory.h
+++ b/lib/arch/include/aarch64/memory.h
@@ -71,4 +71,40 @@
 }
 #define SCA_READ64_ACQUIRE(_p) ((typeof(*(_p)))__sca_read64_acquire((void *)(_p)))
 
+/* Single-Copy Atomic 16-bit read */
+static inline uint16_t __sca_read16(uint16_t *ptr)
+{
+	uint16_t val;
+
+	/* To avoid misra-c2012-2.7 warnings */
+	(void)ptr;
+
+	asm volatile(
+	"	ldrh	%w[val], %[ptr]\n"
+	: [val] "=r" (val)
+	: [ptr] "m" (*ptr)
+	);
+
+	return val;
+}
+#define SCA_READ16(_p) ((typeof(*(_p)))__sca_read16((void *)(_p)))
+
+/* Single-Copy Atomic 16-bit read with ACQUIRE memory ordering semantics */
+static inline uint16_t __sca_read16_acquire(uint16_t *ptr)
+{
+	uint16_t val;
+
+	/* To avoid misra-c2012-2.7 warnings */
+	(void)ptr;
+
+	asm volatile(
+	"	ldarh	%w[val], %[ptr]\n"
+	: [val] "=r" (val)
+	: [ptr] "Q" (*ptr)
+	);
+
+	return val;
+}
+#define SCA_READ16_ACQUIRE(_p) ((typeof(*(_p)))__sca_read16_acquire((void *)(_p)))
+
 #endif /* MEMORY_H */
diff --git a/lib/arch/include/aarch64/spinlock.h b/lib/arch/include/aarch64/spinlock.h
index 37cab6e..4e2a0cd 100644
--- a/lib/arch/include/aarch64/spinlock.h
+++ b/lib/arch/include/aarch64/spinlock.h
@@ -7,9 +7,10 @@
 #define SPINLOCK_H
 
 /*
- * A trivial spinlock implementation, per ARM DDI 0487D.a, section K11.3.4.
+ * Trivial spinlock implementations, per ARM DDI 0487J.a, section K13.3.1
  */
 
+/* 32-bit spinlock */
 typedef struct {
 	unsigned int val;
 } spinlock_t;
@@ -44,4 +45,39 @@
 	);
 }
 
+/* 8-bit spinlock */
+typedef struct {
+	unsigned char val;
+} byte_spinlock_t;
+
+static inline void byte_spinlock_acquire(byte_spinlock_t *l)
+{
+	unsigned int tmp;
+
+	asm volatile(
+	"	sevl\n"
+	"	prfm	pstl1keep, %[lock]\n"
+	"1:\n"
+	"	wfe\n"
+	"	ldaxrb	%w[tmp], %[lock]\n"
+	"	cbnz	%w[tmp], 1b\n"
+	"	stxrb	%w[tmp], %w[one], %[lock]\n"
+	"	cbnz	%w[tmp], 1b\n"
+	: [lock] "+Q" (l->val),
+	  [tmp] "=&r" (tmp)
+	: [one] "r" (1)
+	: "memory"
+	);
+}
+
+static inline void byte_spinlock_release(byte_spinlock_t *l)
+{
+	asm volatile(
+	"	stlrb	wzr, %[lock]\n"
+	: [lock] "+Q" (l->val)
+	:
+	: "memory"
+	);
+}
+
 #endif /* SPINLOCK_H */
diff --git a/lib/arch/include/fake_host/atomics.h b/lib/arch/include/fake_host/atomics.h
index fd555e3..b923e38 100644
--- a/lib/arch/include/fake_host/atomics.h
+++ b/lib/arch/include/fake_host/atomics.h
@@ -12,7 +12,7 @@
 /*
  * Atomically adds @val to the 64-bit value stored at memory location @loc.
  */
-static inline void atomic_add_64(uint64_t *loc, long val)
+static inline void atomic_add_64(uint64_t *loc, uint64_t val)
 {
 	*loc = *loc + val;
 }
@@ -22,9 +22,30 @@
  * Stores to memory with release semantics.
  * Returns the old value.
  */
-static inline unsigned long atomic_load_add_release_64(uint64_t *loc, long val)
+static inline uint64_t atomic_load_add_release_64(uint64_t *loc, uint64_t val)
 {
-	unsigned long old_val = *loc;
+	uint64_t old_val = *loc;
+
+	*loc = *loc + val;
+	return old_val;
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ */
+static inline void atomic_add_16(uint16_t *loc, uint16_t val)
+{
+	*loc = *loc + val;
+}
+
+/*
+ * Atomically adds @val to the 16-bit value stored at memory location @loc.
+ * Stores to memory with release semantics.
+ * Returns the old value.
+ */
+static inline uint16_t atomic_load_add_release_16(uint16_t *loc, uint16_t val)
+{
+	uint16_t old_val = *loc;
 
 	*loc = *loc + val;
 	return old_val;
@@ -69,7 +90,7 @@
 static inline bool atomic_bit_set_acquire_release_64(uint64_t *loc, unsigned int bit)
 {
 	uint64_t mask = (1UL << bit);
-	unsigned long old_val = *loc & mask;
+	uint16_t old_val = *loc & mask;
 
 	*loc |= mask;
 	return (old_val != 0UL);
diff --git a/lib/arch/include/fake_host/memory.h b/lib/arch/include/fake_host/memory.h
index 3f182a4..645f5d2 100644
--- a/lib/arch/include/fake_host/memory.h
+++ b/lib/arch/include/fake_host/memory.h
@@ -37,4 +37,19 @@
 }
 #define SCA_READ64_ACQUIRE(_p) ((typeof(*(_p)))__sca_read64_acquire((uint64_t *)(_p)))
 
+/* Single-Copy Atomic 16-bit read */
+static inline uint16_t __sca_read16(uint16_t *ptr)
+{
+	return *ptr;
+}
+#define SCA_READ16(_p) ((typeof(*(_p)))__sca_read16((uint16_t *)(_p)))
+
+/* Single-Copy Atomic 16-bit read with ACQUIRE memory ordering semantics */
+static inline uint16_t __sca_read16_acquire(uint16_t *ptr)
+{
+	return *ptr;
+}
+#define SCA_READ16_ACQUIRE(_p) ((typeof(*(_p)))__sca_read16_acquire((uint16_t *)(_p)))
+
+
 #endif /* MEMORY_H */
diff --git a/lib/arch/include/fake_host/spinlock.h b/lib/arch/include/fake_host/spinlock.h
index 7aa4b29..e279825 100644
--- a/lib/arch/include/fake_host/spinlock.h
+++ b/lib/arch/include/fake_host/spinlock.h
@@ -22,4 +22,18 @@
 	host_spinlock_release(l);
 }
 
+typedef struct byte_spinlock_s {
+	unsigned char val;
+} byte_spinlock_t;
+
+static inline void byte_spinlock_acquire(byte_spinlock_t *l)
+{
+	host_byte_spinlock_acquire(l);
+}
+
+static inline void byte_spinlock_release(byte_spinlock_t *l)
+{
+	host_byte_spinlock_release(l);
+}
+
 #endif /* SPINLOCK_H */