Do not use NEON for AES-CBC on aarch64 Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>

commit: d05e7f1ab3e76de673a409424ba29f7cc187ef8f [log] [tgz]
author: Dave Rodgman <dave.rodgman@arm.com> Wed Jun 14 18:58:48 2023 +0100
committer: Dave Rodgman <dave.rodgman@arm.com> Wed Jun 14 18:58:48 2023 +0100
tree: 3d011b2bf4d7f264175c0aae7bc62f32ac06b6ac
parent: 906c63cf3571b64db8eed423f1240195e4cb14e3 [diff] [blame]
diff --git a/library/aes.c b/library/aes.c
index 6d8cf2e..6a7e610 100644
--- a/library/aes.c
+++ b/library/aes.c

@@ -1035,6 +1035,24 @@
 }
 
 #if defined(MBEDTLS_CIPHER_MODE_CBC)
+
+#if defined(__ARM_NEON) && defined(__aarch64__)
+            /* Avoid using the NEON implementation of mbedtls_xor. Because of the dependency on
+             * the result for the next block in CBC, and the cost of transferring that data from
+             * NEON registers, it is faster to use the following on aarch64.
+             * For 32-bit arm, NEON should be faster. */
+#define CBC_XOR_16(r, a, b) do {                                           \
+            mbedtls_put_unaligned_uint64(r,                                    \
+                                         mbedtls_get_unaligned_uint64(a) ^     \
+                                         mbedtls_get_unaligned_uint64(b));     \
+            mbedtls_put_unaligned_uint64(r + 8,                                \
+                                         mbedtls_get_unaligned_uint64(a + 8) ^ \
+                                         mbedtls_get_unaligned_uint64(b + 8)); \
+} while (0)
+#else
+#define CBC_XOR_16(r, a, b) mbedtls_xor(r, a, b, 16)
+#endif
+
 /*
  * AES-CBC buffer encryption/decryption
  */
@@ -1077,8 +1095,7 @@
             if (ret != 0) {
                 goto exit;
             }
-
-            mbedtls_xor(output, output, iv, 16);
+            CBC_XOR_16(output, output, iv);
 
             memcpy(iv, temp, 16);
 
@@ -1088,7 +1105,7 @@
         }
     } else {
         while (length > 0) {
-            mbedtls_xor(output, input, ivp, 16);
+            CBC_XOR_16(output, input, ivp);
 
             ret = mbedtls_aes_crypt_ecb(ctx, mode, output, output);
             if (ret != 0) {
commit	d05e7f1ab3e76de673a409424ba29f7cc187ef8f	[log] [tgz]
author	Dave Rodgman <dave.rodgman@arm.com>	Wed Jun 14 18:58:48 2023 +0100
committer	Dave Rodgman <dave.rodgman@arm.com>	Wed Jun 14 18:58:48 2023 +0100
tree	3d011b2bf4d7f264175c0aae7bc62f32ac06b6ac
parent	906c63cf3571b64db8eed423f1240195e4cb14e3 [diff] [blame]