Refactor aarch64 barriers and TLBI commands

Use macros instead of function calls to not rely on LTO inlining.
Make macros take the op-kind argument to generalize.

Test: ./kokoro/ubuntu/build.sh
Change-Id: I8a5553d47cf3a0965fbf35d93c3c925f5f02ac4e
diff --git a/inc/hf/arch/barriers.h b/inc/hf/arch/barriers.h
index 1ff43e6..85d4e7d 100644
--- a/inc/hf/arch/barriers.h
+++ b/inc/hf/arch/barriers.h
@@ -16,20 +16,55 @@
 
 #pragma once
 
-/**
- * Ensures all explicit memory accesses before this point are completed before
- * any later memory accesses are performed.
- */
-void dmb(void);
+/** AArch64-specific API */
 
 /**
- * Ensures all explicit memory access and management instructions have completed
- * before continuing.
+ * Ensures explicit memory accesses before this point are completed before any
+ * later memory accesses are performed. The instruction argument specifies:
+ *   - the shareability domain over which the instruction must operate,
+ *   - the accesses for which the instruction operates.
  */
-void dsb(void);
+#define dmb(arg)                               \
+	do {                                   \
+		__asm__ volatile("dmb " #arg); \
+	} while (0)
+
+/**
+ * Ensures explicit memory access and management instructions have completed
+ * before continuing. The instruction argument specifies:
+ *   - the shareability domain over which the instruction must operate,
+ *   - the accesses for which the instruction operates.
+ */
+#define dsb(arg)                               \
+	do {                                   \
+		__asm__ volatile("dsb " #arg); \
+	} while (0)
 
 /**
  * Flushes the instruction pipeline so that instructions are fetched from
  * memory.
  */
-void isb(void);
+#define isb()                            \
+	do {                             \
+		__asm__ volatile("isb"); \
+	} while (0)
+
+/** Platform-agnostic API */
+
+/**
+ * Ensures all explicit memory accesses before this point are completed before
+ * any later memory accesses are performed.
+ */
+#define memory_ordering_barrier() dmb(sy)
+
+/**
+ * Ensures all explicit memory access and management instructions have completed
+ * before continuing.
+ */
+#define data_sync_barrier() dsb(sy)
+
+/**
+ * Flushes the instruction pipeline so that instructions are fetched from
+ * memory.
+ */
+#define insn_sync_barrier() isb()
diff --git a/inc/hf/io.h b/inc/hf/io.h
index fbafaf8..f4a80a4 100644
--- a/inc/hf/io.h
+++ b/inc/hf/io.h
@@ -133,7 +133,7 @@
 {
 	uint8_t v = io_read8(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -141,7 +141,7 @@
 {
 	uint16_t v = io_read16(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -149,7 +149,7 @@
 {
 	uint32_t v = io_read32(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -157,7 +157,7 @@
 {
 	uint64_t v = io_read64(io);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -165,7 +165,7 @@
 {
 	uint8_t v = io_read8_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -173,7 +173,7 @@
 {
 	uint16_t v = io_read16_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -181,7 +181,7 @@
 {
 	uint32_t v = io_read32_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -189,7 +189,7 @@
 {
 	uint64_t v = io_read64_array(io, n);
 
-	dsb();
+	data_sync_barrier();
 	return v;
 }
 
@@ -247,48 +247,48 @@
 
 static inline void io_write8_mb(io8_t io, uint8_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write8(io, v);
 }
 
 static inline void io_write16_mb(io16_t io, uint16_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write16(io, v);
 }
 
 static inline void io_write32_mb(io32_t io, uint32_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write32(io, v);
 }
 
 static inline void io_write64_mb(io64_t io, uint64_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write64(io, v);
 }
 
 static inline void io_write8_array_mb(io8_array_t io, size_t n, uint8_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write8_array(io, n, v);
 }
 
 static inline void io_write16_array_mb(io16_array_t io, size_t n, uint16_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write16_array(io, n, v);
 }
 
 static inline void io_write32_array_mb(io32_array_t io, size_t n, uint32_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write32_array(io, n, v);
 }
 
 static inline void io_write64_array_mb(io64_array_t io, size_t n, uint64_t v)
 {
-	dsb();
+	data_sync_barrier();
 	io_write64_array(io, n, v);
 }