Fix segfault on x32 by using better register constraints in bn_mul.h
On x32, pointers are only 4-bytes wide and need to be loaded using the "movl"
instruction instead of "movq" to avoid loading garbage into the register.
The MULADDC routines for x86-64 are adjusted to work on x32 as well by getting
gcc to load all the registers for us in advance (and storing them later) by
using better register constraints. The b, c, D and S constraints correspond to
the rbx, rcx, rdi and rsi registers respectively.
diff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h
index 1fc7aa6..cac3f14 100644
--- a/include/mbedtls/bn_mul.h
+++ b/include/mbedtls/bn_mul.h
@@ -162,10 +162,6 @@
#define MULADDC_INIT \
asm( \
- "movq %3, %%rsi \n\t" \
- "movq %4, %%rdi \n\t" \
- "movq %5, %%rcx \n\t" \
- "movq %6, %%rbx \n\t" \
"xorq %%r8, %%r8 \n\t"
#define MULADDC_CORE \
@@ -181,12 +177,9 @@
"addq $8, %%rdi \n\t"
#define MULADDC_STOP \
- "movq %%rcx, %0 \n\t" \
- "movq %%rdi, %1 \n\t" \
- "movq %%rsi, %2 \n\t" \
- : "=m" (c), "=m" (d), "=m" (s) \
- : "m" (s), "m" (d), "m" (c), "m" (b) \
- : "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" \
+ : "+c" (c), "+D" (d), "+S" (s) \
+ : "b" (b) \
+ : "rax", "rdx", "r8" \
);
#endif /* AMD64 */