fix(cpufeat): replace "bti" mnemonic with hint instructions

Older GNU binutils version require to specify at least "armv8.5-a" for
the ARM architecture revision to accept "bti" instructions in the
assembly code. Binutils v2.35 have relaxed this, since "bti" is in the
hint space, so is ignored on older cores and does NOT require a BTI
enabled core to execute.

To not exclude those older binutils versions (as shipped with Ubuntu
20.04), use the "hint" encoding for the "bti" instructions, which are
accepted regardless of the minimum architecture revision. Hide this
encoding in a macro, to make the "bti" usage more readable in the
source code.

Change-Id: I005586efd8974a3f2c7202896c881bb5fed07eea
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
diff --git a/include/arch/aarch64/asm_macros.S b/include/arch/aarch64/asm_macros.S
index da51bf8..0afc9c2 100644
--- a/include/arch/aarch64/asm_macros.S
+++ b/include/arch/aarch64/asm_macros.S
@@ -230,7 +230,7 @@
 	 */
 	.macro	read reg:req
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 	mrs	x0, \reg
 	ret
@@ -241,7 +241,7 @@
 	 */
 	.macro	write reg:req
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 	msr	\reg, x1
 	ret
diff --git a/include/common/asm_macros_common.S b/include/common/asm_macros_common.S
index fd0ea81..9172b55 100644
--- a/include/common/asm_macros_common.S
+++ b/include/common/asm_macros_common.S
@@ -7,6 +7,20 @@
 #define ASM_MACROS_COMMON_S
 
 	/*
+	 * Provide a wrapper for the "bti" instructions using the more
+	 * compatible "hint" encoding, otherwise older toolchains would reject
+	 * this when not compiled for a BTI capable machine (-march=armv8.5-a).
+	 */
+	.macro	BTI _targets
+	.ifc	\_targets, j
+	hint	#36
+	.endif
+	.ifc	\_targets, jc
+	hint	#38
+	.endif
+	.endm
+
+	/*
 	 * This macro is used to create a function label and place the
 	 * code into a separate text section based on the function name
 	 * to enable elimination of unused code during linking. It also adds
@@ -42,7 +56,7 @@
 	/* When Branch Target Identification is enabled, insert "bti jc"
 	 * instruction to enable indirect calls and branches
 	 */
-	 bti	jc
+	BTI	jc
 #endif
 	.endm
 
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index c43beb6..d303a70 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -550,7 +550,7 @@
 	.align \_align
 	\_name:
 #if ENABLE_BTI
-	bti	jc
+	BTI	jc
 #endif
 .endm
 
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index ff9a4e6..cc46c53 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -215,7 +215,7 @@
 
 	.macro	dcsw_loop _op
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 loop2_\_op:
 	lsl	w7, w6, w2		// w7 = aligned max set number