Merge changes I005586ef,I0d4d74bc into integration

* changes:
  fix(cpufeat): replace "bti" mnemonic with hint instructions
  fix(cpufeat): improve xpaci wrapper
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 9419583..01b7335 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -240,12 +240,11 @@
  ******************************************************************************/
 static inline u_register_t xpaci(u_register_t arg)
 {
-	register u_register_t x0 asm("x0") = arg;
+	__asm__ (".arch armv8.3-a\n"
+		 "xpaci %0\n"
+		 : "+r" (arg));
 
-	/* `xpaci x0` for compatibility with older compiler and/or older -march */
-	__asm__ (".arch armv8.3-a; xpaci %0\n" : "+r" (x0));
-
-	return x0;
+	return arg;
 }
 
 void flush_dcache_range(uintptr_t addr, size_t size);
diff --git a/include/arch/aarch64/asm_macros.S b/include/arch/aarch64/asm_macros.S
index da51bf8..0afc9c2 100644
--- a/include/arch/aarch64/asm_macros.S
+++ b/include/arch/aarch64/asm_macros.S
@@ -230,7 +230,7 @@
 	 */
 	.macro	read reg:req
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 	mrs	x0, \reg
 	ret
@@ -241,7 +241,7 @@
 	 */
 	.macro	write reg:req
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 	msr	\reg, x1
 	ret
diff --git a/include/common/asm_macros_common.S b/include/common/asm_macros_common.S
index fd0ea81..9172b55 100644
--- a/include/common/asm_macros_common.S
+++ b/include/common/asm_macros_common.S
@@ -7,6 +7,20 @@
 #define ASM_MACROS_COMMON_S
 
 	/*
+	 * Provide a wrapper for the "bti" instructions using the more
+	 * compatible "hint" encoding, otherwise older toolchains would reject
+	 * this when not compiled for a BTI capable machine (-march=armv8.5-a).
+	 */
+	.macro	BTI _targets
+	.ifc	\_targets, j
+	hint	#36
+	.endif
+	.ifc	\_targets, jc
+	hint	#38
+	.endif
+	.endm
+
+	/*
 	 * This macro is used to create a function label and place the
 	 * code into a separate text section based on the function name
 	 * to enable elimination of unused code during linking. It also adds
@@ -42,7 +56,7 @@
 	/* When Branch Target Identification is enabled, insert "bti jc"
 	 * instruction to enable indirect calls and branches
 	 */
-	 bti	jc
+	BTI	jc
 #endif
 	.endm
 
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index 5d2bb7b..402e07f 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -514,7 +514,7 @@
 	.align \_align
 	\_name:
 #if ENABLE_BTI
-	bti	jc
+	BTI	jc
 #endif
 .endm
 
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index ff9a4e6..cc46c53 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -215,7 +215,7 @@
 
 	.macro	dcsw_loop _op
 #if ENABLE_BTI
-	bti	j
+	BTI	j
 #endif
 loop2_\_op:
 	lsl	w7, w6, w2		// w7 = aligned max set number