Refactor aarch64 barriers and TLBI commands

Use macros instead of function calls to not rely on LTO inlining.
Make macros take the op-kind argument to generalize.

Test: ./kokoro/ubuntu/build.sh
Change-Id: I8a5553d47cf3a0965fbf35d93c3c925f5f02ac4e
diff --git a/src/arch/aarch64/BUILD.gn b/src/arch/aarch64/BUILD.gn
index 9f29b72..8abc3d0 100644
--- a/src/arch/aarch64/BUILD.gn
+++ b/src/arch/aarch64/BUILD.gn
@@ -19,7 +19,6 @@
 # Implementation of the arch interface for aarch64.
 source_set("arch") {
   sources = [
-    "barriers.c",
     "cpu.c",
     "mm.c",
     "timer.c",
diff --git a/src/arch/aarch64/barriers.c b/src/arch/aarch64/barriers.c
deleted file mode 100644
index 8642b05..0000000
--- a/src/arch/aarch64/barriers.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2018 The Hafnium Authors.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hf/arch/barriers.h"
-
-void dmb(void)
-{
-	__asm__ volatile("dmb sy");
-}
-
-void dsb(void)
-{
-	__asm__ volatile("dsb sy");
-}
-
-void isb(void)
-{
-	__asm__ volatile("isb");
-}
diff --git a/src/arch/aarch64/hypervisor/handler.c b/src/arch/aarch64/hypervisor/handler.c
index 7ee74d1..621b847 100644
--- a/src/arch/aarch64/hypervisor/handler.c
+++ b/src/arch/aarch64/hypervisor/handler.c
@@ -18,6 +18,7 @@
 
 #include "hf/arch/barriers.h"
 #include "hf/arch/init.h"
+#include "hf/arch/mm.h"
 
 #include "hf/api.h"
 #include "hf/cpu.h"
@@ -102,15 +103,6 @@
 }
 
 /**
- * Ensures all explicit memory access and management instructions for
- * non-shareable normal memory have completed before continuing.
- */
-static void dsb_nsh(void)
-{
-	__asm__ volatile("dsb nsh");
-}
-
-/**
  * Invalidate all stage 1 TLB entries on the current (physical) CPU for the
  * current VMID.
  */
@@ -135,7 +127,7 @@
 	 * TLB invalidation has taken effect. Non-sharable is enough because the
 	 * TLB is local to the CPU.
 	 */
-	dsb_nsh();
+	dsb(nsh);
 }
 
 /**
diff --git a/src/arch/aarch64/mm.c b/src/arch/aarch64/mm.c
index c2a1840..623ba0c 100644
--- a/src/arch/aarch64/mm.c
+++ b/src/arch/aarch64/mm.c
@@ -16,6 +16,7 @@
 
 #include "hf/mm.h"
 
+#include "hf/arch/barriers.h"
 #include "hf/arch/cpu.h"
 
 #include "hf/dlog.h"
@@ -89,6 +90,15 @@
 
 /* clang-format on */
 
+#define tlbi(op)                               \
+	do {                                   \
+		__asm__ volatile("tlbi " #op); \
+	} while (0)
+#define tlbi_reg(op, reg)                                              \
+	do {                                                           \
+		__asm__ __volatile__("tlbi " #op ", %0" : : "r"(reg)); \
+	} while (0)
+
 /** Mask for the address bits of the pte. */
 #define PTE_ADDR_MASK \
 	(((UINT64_C(1) << 48) - 1) & ~((UINT64_C(1) << PAGE_BITS) - 1))
@@ -243,13 +253,13 @@
 	begin >>= 12;
 	end >>= 12;
 
-	__asm__ volatile("dsb ishst");
+	dsb(ishst);
 
 	for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
-		__asm__("tlbi vae2is, %0" : : "r"(it));
+		tlbi_reg(vae2is, it);
 	}
 
-	__asm__ volatile("dsb ish");
+	dsb(ish);
 }
 
 /**
@@ -267,16 +277,15 @@
 	begin >>= 12;
 	end >>= 12;
 
-	__asm__ volatile("dsb ishst");
+	dsb(ishst);
 
 	for (it = begin; it < end; it += (UINT64_C(1) << (PAGE_BITS - 12))) {
-		__asm__("tlbi ipas2e1, %0" : : "r"(it));
+		tlbi_reg(ipas2e1, it);
 	}
 
-	__asm__ volatile(
-		"dsb ish\n"
-		"tlbi vmalle1is\n"
-		"dsb ish\n");
+	dsb(ish);
+	tlbi(vmalle1is);
+	dsb(ish);
 }
 
 /**
@@ -294,8 +303,7 @@
 		__asm__ volatile("dc cvac, %0" : : "r"(line_begin));
 		line_begin += line_size;
 	}
-
-	__asm__ volatile("dsb sy");
+	dsb(sy);
 }
 
 uint64_t arch_mm_mode_to_stage1_attrs(int mode)
@@ -555,10 +563,10 @@
 	    (3 << 28) | /* RES1 bits. */
 	    0;
 
-	__asm__ volatile("dsb sy");
-	__asm__ volatile("isb");
+	dsb(sy);
+	isb();
 	write_msr(sctlr_el2, v);
-	__asm__ volatile("isb");
+	isb();
 
 	return true;
 }
diff --git a/src/arch/aarch64/pl011/pl011.c b/src/arch/aarch64/pl011/pl011.c
index 9afc3b9..f98f36e 100644
--- a/src/arch/aarch64/pl011/pl011.c
+++ b/src/arch/aarch64/pl011/pl011.c
@@ -57,12 +57,10 @@
 		/* do nothing */
 	}
 
-	dmb();
-
-	/* Write the character out. */
+	/* Write the character out, force memory access ordering. */
+	memory_ordering_barrier();
 	io_write32(UARTDR, c);
-
-	dmb();
+	memory_ordering_barrier();
 
 	/* Wait until the UART is no longer busy. */
 	while (io_read32_mb(UARTFR) & UARTFR_BUSY) {