Merge pull request #8326 from daverodgman/aesce-thumb2
Support hw-accelerated AES on Thumb and Arm
diff --git a/ChangeLog.d/armv8-aesce.txt b/ChangeLog.d/armv8-aesce.txt
new file mode 100644
index 0000000..ec5889c
--- /dev/null
+++ b/ChangeLog.d/armv8-aesce.txt
@@ -0,0 +1,3 @@
+Features
+ * Support use of Armv8-A Cryptographic Extensions for hardware acclerated
+ AES when compiling for Thumb (T32) or 32-bit Arm (A32).
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 552d944..930eb4b 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -2238,7 +2238,7 @@
/**
* \def MBEDTLS_AESCE_C
*
- * Enable AES cryptographic extension support on 64-bit Arm.
+ * Enable AES cryptographic extension support on Armv8.
*
* Module: library/aesce.c
* Caller: library/aes.c
@@ -2249,13 +2249,15 @@
* system, Armv8-A Cryptographic Extensions must be supported by
* the CPU when this option is enabled.
*
- * \note Minimum compiler versions for this feature are Clang 4.0,
- * armclang 6.6, GCC 6.0 or MSVC 2019 version 16.11.2.
+ * \note Minimum compiler versions for this feature when targeting aarch64
+ * are Clang 4.0; armclang 6.6; GCC 6.0; or MSVC 2019 version 16.11.2.
+ * Minimum compiler versions for this feature when targeting 32-bit
+ * Arm or Thumb are Clang 11.0; armclang 6.20; or GCC 6.0.
*
* \note \c CFLAGS must be set to a minimum of \c -march=armv8-a+crypto for
* armclang <= 6.9
*
- * This module adds support for the AES Armv8-A Cryptographic Extensions on Aarch64 systems.
+ * This module adds support for the AES Armv8-A Cryptographic Extensions on Armv8 systems.
*/
#define MBEDTLS_AESCE_C
diff --git a/library/aes.c b/library/aes.c
index 5e20d51..f4b9739 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -23,9 +23,9 @@
#include "mbedtls/error.h"
#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
-#if !((defined(MBEDTLS_ARCH_IS_ARM64) && defined(MBEDTLS_AESCE_C)) || \
- (defined(MBEDTLS_ARCH_IS_X64) && defined(MBEDTLS_AESNI_C)) || \
- (defined(MBEDTLS_ARCH_IS_X86) && defined(MBEDTLS_AESNI_C)))
+#if !((defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(MBEDTLS_AESCE_C)) || \
+ (defined(MBEDTLS_ARCH_IS_X64) && defined(MBEDTLS_AESNI_C)) || \
+ (defined(MBEDTLS_ARCH_IS_X86) && defined(MBEDTLS_AESNI_C)))
#error "MBEDTLS_AES_USE_HARDWARE_ONLY defined, but not all prerequisites"
#endif
#endif
diff --git a/library/aesce.c b/library/aesce.c
index 2835398..e1e0a15 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -5,8 +5,17 @@
* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
*/
-#if defined(__aarch64__) && !defined(__ARM_FEATURE_CRYPTO) && \
- defined(__clang__) && __clang_major__ >= 4
+#if defined(__clang__) && (__clang_major__ >= 4)
+
+/* Ideally, we would simply use MBEDTLS_ARCH_IS_ARMV8_A in the following #if,
+ * but that is defined by build_info.h, and we need this block to happen first. */
+#if defined(__ARM_ARCH)
+#if __ARM_ARCH >= 8
+#define MBEDTLS_AESCE_ARCH_IS_ARMV8_A
+#endif
+#endif
+
+#if defined(MBEDTLS_AESCE_ARCH_IS_ARMV8_A) && !defined(__ARM_FEATURE_CRYPTO)
/* TODO: Re-consider above after https://reviews.llvm.org/D131064 merged.
*
* The intrinsic declaration are guarded by predefined ACLE macros in clang:
@@ -27,6 +36,8 @@
#define MBEDTLS_ENABLE_ARM_CRYPTO_EXTENSIONS_COMPILER_FLAG
#endif
+#endif /* defined(__clang__) && (__clang_major__ >= 4) */
+
#include <string.h>
#include "common.h"
@@ -34,12 +45,14 @@
#include "aesce.h"
-#if defined(MBEDTLS_ARCH_IS_ARM64)
+#if defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
/* Compiler version checks. */
#if defined(__clang__)
-# if __clang_major__ < 4
-# error "Minimum version of Clang for MBEDTLS_AESCE_C is 4.0."
+# if defined(MBEDTLS_ARCH_IS_ARM32) && (__clang_major__ < 11)
+# error "Minimum version of Clang for MBEDTLS_AESCE_C on 32-bit Arm or Thumb is 11.0."
+# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__clang_major__ < 4)
+# error "Minimum version of Clang for MBEDTLS_AESCE_C on aarch64 is 4.0."
# endif
#elif defined(__GNUC__)
# if __GNUC__ < 6
@@ -52,6 +65,15 @@
# if _MSC_VER < 1929
# error "Minimum version of MSVC for MBEDTLS_AESCE_C is 2019 version 16.11.2."
# endif
+#elif defined(__ARMCC_VERSION)
+# if defined(MBEDTLS_ARCH_IS_ARM32) && (__ARMCC_VERSION < 6200002)
+/* TODO: We haven't verified armclang for 32-bit Arm/Thumb prior to 6.20.
+ * If someone verified that, please update this and document of
+ * `MBEDTLS_AESCE_C` in `mbedtls_config.h`. */
+# error "Minimum version of armclang for MBEDTLS_AESCE_C on 32-bit Arm is 6.20."
+# elif defined(MBEDTLS_ARCH_IS_ARM64) && (__ARMCC_VERSION < 6060000)
+# error "Minimum version of armclang for MBEDTLS_AESCE_C on aarch64 is 6.6."
+# endif
#endif
#ifdef __ARM_NEON
@@ -84,8 +106,19 @@
#if defined(__linux__) && !defined(MBEDTLS_AES_USE_HARDWARE_ONLY)
-#include <asm/hwcap.h>
#include <sys/auxv.h>
+#if !defined(HWCAP_NEON)
+#define HWCAP_NEON (1 << 12)
+#endif
+#if !defined(HWCAP2_AES)
+#define HWCAP2_AES (1 << 0)
+#endif
+#if !defined(HWCAP_AES)
+#define HWCAP_AES (1 << 3)
+#endif
+#if !defined(HWCAP_ASIMD)
+#define HWCAP_ASIMD (1 << 1)
+#endif
signed char mbedtls_aesce_has_support_result = -1;
@@ -102,6 +135,16 @@
* once, but that is harmless.
*/
if (mbedtls_aesce_has_support_result == -1) {
+#if defined(MBEDTLS_ARCH_IS_ARM32)
+ unsigned long auxval = getauxval(AT_HWCAP);
+ unsigned long auxval2 = getauxval(AT_HWCAP2);
+ if (((auxval & HWCAP_NEON) == HWCAP_NEON) &&
+ ((auxval2 & HWCAP2_AES) == HWCAP2_AES)) {
+ mbedtls_aesce_has_support_result = 1;
+ } else {
+ mbedtls_aesce_has_support_result = 0;
+ }
+#else
unsigned long auxval = getauxval(AT_HWCAP);
if ((auxval & (HWCAP_ASIMD | HWCAP_AES)) ==
(HWCAP_ASIMD | HWCAP_AES)) {
@@ -109,6 +152,7 @@
} else {
mbedtls_aesce_has_support_result = 0;
}
+#endif
}
return mbedtls_aesce_has_support_result;
}
@@ -362,24 +406,91 @@
#if defined(MBEDTLS_GCM_C)
-#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 5
-/* Some intrinsics are not available for GCC 5.X. */
-#define vreinterpretq_p64_u8(a) ((poly64x2_t) a)
-#define vreinterpretq_u8_p128(a) ((uint8x16_t) a)
-static inline poly64_t vget_low_p64(poly64x2_t __a)
+#if defined(MBEDTLS_ARCH_IS_ARM32)
+
+#if defined(__clang__)
+/* On clang for A32/T32, work around some missing intrinsics and types which are listed in
+ * [ACLE](https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#polynomial-1)
+ * These are only required for GCM.
+ */
+#define vreinterpretq_u64_p64(a) ((uint64x2_t) a)
+
+typedef uint8x16_t poly128_t;
+
+static inline poly128_t vmull_p64(poly64_t a, poly64_t b)
{
- uint64x2_t tmp = (uint64x2_t) (__a);
- uint64x1_t lo = vcreate_u64(vgetq_lane_u64(tmp, 0));
- return (poly64_t) (lo);
+ poly128_t r;
+ asm ("vmull.p64 %[r], %[a], %[b]" : [r] "=w" (r) : [a] "w" (a), [b] "w" (b) :);
+ return r;
}
-#endif /* !__clang__ && __GNUC__ && __GNUC__ == 5*/
+
+/* This is set to cause some more missing intrinsics to be defined below */
+#define COMMON_MISSING_INTRINSICS
+
+static inline poly128_t vmull_high_p64(poly64x2_t a, poly64x2_t b)
+{
+ return vmull_p64((poly64_t) (vget_high_u64((uint64x2_t) a)),
+ (poly64_t) (vget_high_u64((uint64x2_t) b)));
+}
+
+#endif /* defined(__clang__) */
+
+static inline uint8x16_t vrbitq_u8(uint8x16_t x)
+{
+ /* There is no vrbitq_u8 instruction in A32/T32, so provide
+ * an equivalent non-Neon implementation. Reverse bit order in each
+ * byte with 4x rbit, rev. */
+ asm ("ldm %[p], { r2-r5 } \n\t"
+ "rbit r2, r2 \n\t"
+ "rev r2, r2 \n\t"
+ "rbit r3, r3 \n\t"
+ "rev r3, r3 \n\t"
+ "rbit r4, r4 \n\t"
+ "rev r4, r4 \n\t"
+ "rbit r5, r5 \n\t"
+ "rev r5, r5 \n\t"
+ "stm %[p], { r2-r5 } \n\t"
+ :
+ /* Output: 16 bytes of memory pointed to by &x */
+ "+m" (*(uint8_t(*)[16]) &x)
+ :
+ [p] "r" (&x)
+ :
+ "r2", "r3", "r4", "r5"
+ );
+ return x;
+}
+
+#endif /* defined(MBEDTLS_ARCH_IS_ARM32) */
+
+#if defined(MBEDTLS_COMPILER_IS_GCC) && __GNUC__ == 5
+/* Some intrinsics are not available for GCC 5.X. */
+#define COMMON_MISSING_INTRINSICS
+#endif /* MBEDTLS_COMPILER_IS_GCC && __GNUC__ == 5 */
+
+
+#if defined(COMMON_MISSING_INTRINSICS)
+
+/* Missing intrinsics common to both GCC 5, and Clang on 32-bit */
+
+#define vreinterpretq_p64_u8(a) ((poly64x2_t) a)
+#define vreinterpretq_u8_p128(a) ((uint8x16_t) a)
+
+static inline poly64x1_t vget_low_p64(poly64x2_t a)
+{
+ uint64x1_t r = vget_low_u64(vreinterpretq_u64_p64(a));
+ return (poly64x1_t) r;
+
+}
+
+#endif /* COMMON_MISSING_INTRINSICS */
/* vmull_p64/vmull_high_p64 wrappers.
*
* Older compilers miss some intrinsic functions for `poly*_t`. We use
* uint8x16_t and uint8x16x3_t as input/output parameters.
*/
-#if defined(__GNUC__) && !defined(__clang__)
+#if defined(MBEDTLS_COMPILER_IS_GCC)
/* GCC reports incompatible type error without cast. GCC think poly64_t and
* poly64x1_t are different, that is different with MSVC and Clang. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64((poly64_t) a, (poly64_t) b)
@@ -388,7 +499,8 @@
* error with/without cast. And I think poly64_t and poly64x1_t are same, no
* cast for clang also. */
#define MBEDTLS_VMULL_P64(a, b) vmull_p64(a, b)
-#endif
+#endif /* MBEDTLS_COMPILER_IS_GCC */
+
static inline uint8x16_t pmull_low(uint8x16_t a, uint8x16_t b)
{
@@ -464,7 +576,7 @@
/* use 'asm' as an optimisation barrier to prevent loading MODULO from
* memory. It is for GNUC compatible compilers.
*/
- asm ("" : "+w" (r));
+ asm volatile ("" : "+w" (r));
#endif
uint8x16_t const MODULO = vreinterpretq_u8_u64(vshrq_n_u64(r, 64 - 8));
uint8x16_t h, m, l; /* input high/middle/low 128b */
@@ -507,6 +619,6 @@
#undef MBEDTLS_POP_TARGET_PRAGMA
#endif
-#endif /* MBEDTLS_ARCH_IS_ARM64 */
+#endif /* MBEDTLS_ARCH_IS_ARMV8_A */
#endif /* MBEDTLS_AESCE_C */
diff --git a/library/aesce.h b/library/aesce.h
index e3b9fa0..cf12d7f 100644
--- a/library/aesce.h
+++ b/library/aesce.h
@@ -2,7 +2,7 @@
* \file aesce.h
*
* \brief Support hardware AES acceleration on Armv8-A processors with
- * the Armv8-A Cryptographic Extension in AArch64 execution state.
+ * the Armv8-A Cryptographic Extension.
*
* \warning These functions are only for internal use by other library
* functions; you must not call them directly.
@@ -19,7 +19,7 @@
#include "mbedtls/aes.h"
-#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARM64)
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_ARCH_IS_ARMV8_A) && defined(__ARM_NEON)
#define MBEDTLS_AESCE_HAVE_CODE
@@ -118,6 +118,12 @@
}
#endif
-#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARM64 */
+#else
+
+#if defined(MBEDTLS_AES_USE_HARDWARE_ONLY) && defined(MBEDTLS_ARCH_IS_ARMV8_A)
+#error "AES hardware acceleration not supported on this platform"
+#endif
+
+#endif /* MBEDTLS_AESCE_C && MBEDTLS_ARCH_IS_ARMV8_A && __ARM_NEON */
#endif /* MBEDTLS_AESCE_H */
diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
index 5c2f1fd..6613426 100755
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@@ -4665,6 +4665,63 @@
armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8-a+crypto"
}
+support_build_aes_armce() {
+ # clang >= 4 is required to build with AES extensions
+ ver="$(clang --version|grep version|sed -E 's#.*version ([0-9]+).*#\1#')"
+ [ "${ver}" -ge 11 ]
+}
+
+component_build_aes_armce () {
+ # Test variations of AES with Armv8 crypto extensions
+ scripts/config.py set MBEDTLS_AESCE_C
+ scripts/config.py set MBEDTLS_AES_USE_HARDWARE_ONLY
+
+ msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, aarch64"
+ make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a+crypto"
+
+ msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, arm"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm"
+
+ msg "MBEDTLS_AES_USE_HARDWARE_ONLY, clang, thumb"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb"
+
+ scripts/config.py unset MBEDTLS_AES_USE_HARDWARE_ONLY
+
+ msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, aarch64"
+ make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a+crypto"
+
+ msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, arm"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm"
+
+ msg "no MBEDTLS_AES_USE_HARDWARE_ONLY, clang, thumb"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb"
+
+ # test for presence of AES instructions
+ scripts/config.py set MBEDTLS_AES_USE_HARDWARE_ONLY
+ msg "clang, test A32 crypto instructions built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm -S"
+ grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
+ msg "clang, test T32 crypto instructions built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb -S"
+ grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
+ msg "clang, test aarch64 crypto instructions built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a -S"
+ grep -E 'aes[a-z]+\s*[qv]' library/aesce.o
+
+ # test for absence of AES instructions
+ scripts/config.py unset MBEDTLS_AES_USE_HARDWARE_ONLY
+ scripts/config.py unset MBEDTLS_AESCE_C
+ msg "clang, test A32 crypto instructions not built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a72+crypto -marm -S"
+ not grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
+ msg "clang, test T32 crypto instructions not built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=arm-linux-gnueabihf -mcpu=cortex-a32+crypto -mthumb -S"
+ not grep -E 'aes[0-9a-z]+.[0-9]\s*[qv]' library/aesce.o
+ msg "clang, test aarch64 crypto instructions not built"
+ make -B library/aesce.o CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a -S"
+ not grep -E 'aes[a-z]+\s*[qv]' library/aesce.o
+}
+
support_build_sha_armce() {
if command -v clang > /dev/null ; then
# clang >= 4 is required to build with SHA extensions
@@ -5438,6 +5495,9 @@
# armc[56] don't support SHA-512 intrinsics
scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+ # older versions of armcc/armclang don't support AESCE_C on 32-bit Arm
+ scripts/config.py unset MBEDTLS_AESCE_C
+
# Stop armclang warning about feature detection for A64_CRYPTO.
# With this enabled, the library does build correctly under armclang,
# but in baremetal builds (as tested here), feature detection is
@@ -5470,14 +5530,18 @@
# ARM Compiler 6 - Target ARMv8-M
armc6_build_test "-O1 --target=arm-arm-none-eabi -march=armv8-m.main"
- # ARM Compiler 6 - Target ARMv8.2-A - AArch64
- armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
-
# ARM Compiler 6 - Target Cortex-M0 - no optimisation
armc6_build_test "-O0 --target=arm-arm-none-eabi -mcpu=cortex-m0"
# ARM Compiler 6 - Target Cortex-M0
armc6_build_test "-Os --target=arm-arm-none-eabi -mcpu=cortex-m0"
+
+ # ARM Compiler 6 - Target ARMv8.2-A - AArch64
+ #
+ # Re-enable MBEDTLS_AESCE_C as this should be supported by the version of armclang
+ # that we have in our CI
+ scripts/config.py set MBEDTLS_AESCE_C
+ armc6_build_test "-O1 --target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
}
support_build_armcc () {