AESNI: add implementation with intrinsics
As of this commit, to use the intrinsics for MBEDTLS_AESNI_C:
* With MSVC, this should be the default.
* With Clang, build with `clang -maes -mpclmul` or equivalent.
* With GCC, build with `gcc -mpclmul -msse2` or equivalent.
In particular, for now, with a GCC-like compiler, when building specifically
for a target that supports both the AES and GCM instructions, the old
implementation using assembly is selected.
This method for platform selection will likely be improved in the future.
Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/library/aes.c b/library/aes.c
index a81332d..36aa7f2 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -552,6 +552,14 @@
#if defined(MBEDTLS_AESNI_HAVE_CODE)
if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+ /* The intrinsics-based implementation needs 16-byte alignment
+ * for the round key array. */
+ unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
+ size_t rk_offset = 0;
+ if (delta != 0) {
+ rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
+ }
+ ctx->rk = RK = ctx->buf + rk_offset;
return mbedtls_aesni_setkey_enc((unsigned char *) ctx->rk, key, keybits);
}
#endif
@@ -665,6 +673,17 @@
goto exit;
}
#endif
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+ if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+ /* The intrinsics-based implementation needs 16-byte alignment
+ * for the round key array. */
+ unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
+ if (delta != 0) {
+ size_t rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
+ ctx->rk = RK = ctx->buf + rk_offset;
+ }
+ }
+#endif
SK = cty.rk + cty.nr * 4;