Add optimized bignum multiplication for Aarch64. x0-x3 are skipped such that function parameters to not have to be moved. MULADDC_INIT and MULADDC_STOP are mostly empty because it is more efficient to keep everything in registers (and that should easily be possible). I considered a MULADDC_HUIT implementation, but could not think of something that would be more efficient than basically 8 consecutive MULADDC_CORE. You could combine the loads and stores, but it's probably more efficient to interleave them with arithmetic, depending on the specific microarchitecture. NEON allows to do a 64x64->128 bit multiplication (and optional accumulation) in one instruction, but is not great at handling carries.

commit: cc1871e674c2508f88dd77106c9f0ba0dbee2120 [log] [tgz]
author: Ko- <k.stoffelen@cs.ru.nl> Thu Aug 16 02:01:57 2018 -0700
committer: Ko- <k.stoffelen@cs.ru.nl> Mon Sep 02 13:44:57 2019 +0200
tree: 6fa59d325ec995871af1c09856cb07a53bc297bb
parent: 03d2daf55c97b833d675e9a1e1c0f6c9dfb240b4 [diff] [blame]
diff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h
index f7cb072..4200ad4 100644
--- a/include/mbedtls/bn_mul.h
+++ b/include/mbedtls/bn_mul.h

@@ -198,6 +198,30 @@
 
 #endif /* AMD64 */
 
+#if defined(__aarch64__)
+
+#define MULADDC_INIT                \
+    asm(
+
+#define MULADDC_CORE                \
+        "ldr x4, [%3], #8   \n\t"   \
+        "ldr x5, [%4]       \n\t"   \
+        "mul x6, x4, %6     \n\t"   \
+        "umulh x7, x4, %6   \n\t"   \
+        "adds x5, x5, x6    \n\t"   \
+        "adc x7, x7, xzr    \n\t"   \
+        "adds x5, x5, %5    \n\t"   \
+        "adc %0, x7, xzr    \n\t"   \
+        "str x5, [%1], #8   \n\t"
+
+#define MULADDC_STOP                            \
+         : "+r" (c),  "=r" (d), "=r" (s)        \
+         : "r" (s), "r" (d), "r" (c), "r" (b)   \
+         : "x4", "x5", "x6", "x7", "cc"         \
+    );
+
+#endif /* Aarch64 */
+
 #if defined(__mc68020__) || defined(__mcpu32__)
 
 #define MULADDC_INIT                    \
commit	cc1871e674c2508f88dd77106c9f0ba0dbee2120	[log] [tgz]
author	Ko- <k.stoffelen@cs.ru.nl>	Thu Aug 16 02:01:57 2018 -0700
committer	Ko- <k.stoffelen@cs.ru.nl>	Mon Sep 02 13:44:57 2019 +0200
tree	6fa59d325ec995871af1c09856cb07a53bc297bb
parent	03d2daf55c97b833d675e9a1e1c0f6c9dfb240b4 [diff] [blame]