XOR perf improvements
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/ChangeLog.d/aes-perf.txt b/ChangeLog.d/aes-perf.txt
new file mode 100644
index 0000000..26819b2
--- /dev/null
+++ b/ChangeLog.d/aes-perf.txt
@@ -0,0 +1,3 @@
+Features
+ * AES performance improvements (XTS, GCM, CCM and CMAC) on 64-bit
+ architectures, of around 5-10%.
diff --git a/library/common.h b/library/common.h
index eb159a7..82001a9 100644
--- a/library/common.h
+++ b/library/common.h
@@ -125,6 +125,13 @@
{
size_t i = 0;
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
+#if defined(__amd64__) || defined(__x86_64__) || defined(__aarch64__)
+ /* This codepath probably only makes sense on architectures with 64-bit registers */
+ for (; (i + 8) <= n; i += 8) {
+ uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
+ mbedtls_put_unaligned_uint64(r + i, x);
+ }
+#endif
for (; (i + 4) <= n; i += 4) {
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
mbedtls_put_unaligned_uint32(r + i, x);