Squashed commit upgrading to mbedtls-3.4.0

Squash merging branch import/mbedtls-3.4.0

8225713449d3 ("libmbedtls: fix unrecognized compiler option")
f03730842d7b ("core: ltc: configure internal MD5")
2b0d0c50127c ("core: ltc: configure internal SHA-1 and SHA-224")
0e48a6e17630 ("libmedtls: core: update to mbedTLS 3.4.0 API")
049882b143af ("libutee: update to mbedTLS 3.4.0 API")
982307bf6169 ("core: LTC mpi_desc.c: update to mbedTLS 3.4.0 API")
33218e9eff7b ("ta: pkcs11: update to mbedTLS 3.4.0 API")
6956420cc064 ("libmbedtls: fix cipher_wrap.c for NIST AES Key Wrap mode")
ad67ef0b43fd ("libmbedtls: fix cipher_wrap.c for chacha20 and chachapoly")
7300f4d97bbf ("libmbedtls: add fault mitigation in mbedtls_rsa_rsassa_pkcs1_v15_verify()")
cec89b62a86d ("libmbedtls: add fault mitigation in mbedtls_rsa_rsassa_pss_verify_ext()")
e7e048796c44 ("libmbedtls: add SM2 curve")
096beff2cd31 ("libmbedtls: mbedtls_mpi_exp_mod(): optimize mempool usage")
7108668efd3f ("libmbedtls: mbedtls_mpi_exp_mod(): reduce stack usage")
0ba4eb8d0572 ("libmbedtls: mbedtls_mpi_exp_mod() initialize W")
3fd6ecf00382 ("libmbedtls: fix no CRT issue")
d5ea7e9e9aa7 ("libmbedtls: add interfaces in mbedtls for context memory operation")
2b0fb3f1fa3d ("libmedtls: mpi_miller_rabin: increase count limit")
2c3301ab99bb ("libmbedtls: add mbedtls_mpi_init_mempool()")
9a111f0da04b ("libmbedtls: make mbedtls_mpi_mont*() available")
804fe3a374f5 ("mbedtls: configure mbedtls to reach for config")
b28a41531427 ("mbedtls: remove default include/mbedtls/config.h")
dfafe507bbef ("Import mbedtls-3.4.0")

Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
Acked-by: Jerome Forissier <jerome.forissier@linaro.org>
Tested-by: Jerome Forissier <jerome.forissier@linaro.org> (vexpress-qemu_armv8a)
diff --git a/lib/libmbedtls/mbedtls/library/bignum.c b/lib/libmbedtls/mbedtls/library/bignum.c
index 598a78c..b6733b7 100644
--- a/lib/libmbedtls/mbedtls/library/bignum.c
+++ b/lib/libmbedtls/mbedtls/library/bignum.c
@@ -38,7 +38,8 @@
 #if defined(MBEDTLS_BIGNUM_C)
 
 #include "mbedtls/bignum.h"
-#include "mbedtls/bn_mul.h"
+#include "bignum_core.h"
+#include "bn_mul.h"
 #include "mbedtls/platform_util.h"
 #include "mbedtls/error.h"
 #include "constant_time_internal.h"
@@ -46,51 +47,32 @@
 #include <limits.h>
 #include <string.h>
 
-#if defined(MBEDTLS_PLATFORM_C)
 #include "mbedtls/platform.h"
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#define mbedtls_printf     printf
-#define mbedtls_calloc    calloc
-#define mbedtls_free       free
-#endif
 
 #include <mempool.h>
 #include <util.h>
 
-#define MPI_VALIDATE_RET( cond )                                       \
-    MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_MPI_BAD_INPUT_DATA )
-#define MPI_VALIDATE( cond )                                           \
-    MBEDTLS_INTERNAL_VALIDATE( cond )
+#define MPI_VALIDATE_RET(cond)                                       \
+    MBEDTLS_INTERNAL_VALIDATE_RET(cond, MBEDTLS_ERR_MPI_BAD_INPUT_DATA)
+#define MPI_VALIDATE(cond)                                           \
+    MBEDTLS_INTERNAL_VALIDATE(cond)
 
-#define ciL    (sizeof(mbedtls_mpi_uint))         /* chars in limb  */
-#define biL    (ciL << 3)               /* bits  in limb  */
-#define biH    (ciL << 2)               /* half limb size */
-
-#define MPI_SIZE_T_MAX  ( (size_t) -1 ) /* SIZE_T_MAX is not standard */
-
-/*
- * Convert between bits/chars and number of limbs
- * Divide first in order to avoid potential overflows
- */
-#define BITS_TO_LIMBS(i)  ( (i) / biL + ( (i) % biL != 0 ) )
-#define CHARS_TO_LIMBS(i) ( (i) / ciL + ( (i) % ciL != 0 ) )
+#define MPI_SIZE_T_MAX  ((size_t) -1)   /* SIZE_T_MAX is not standard */
 
 void *mbedtls_mpi_mempool;
 
 /* Implementation that should never be optimized out by the compiler */
-static void mbedtls_mpi_zeroize( mbedtls_mpi_uint *v, size_t n )
+static void mbedtls_mpi_zeroize(mbedtls_mpi_uint *v, size_t n)
 {
-    mbedtls_platform_zeroize( v, ciL * n );
+    mbedtls_platform_zeroize(v, ciL * n);
 }
 
 /*
  * Initialize one MPI
  */
-static void mpi_init( mbedtls_mpi *X, short use_mempool )
+static void mpi_init(mbedtls_mpi *X, short use_mempool)
 {
-    MPI_VALIDATE( X != NULL );
+    MPI_VALIDATE(X != NULL);
 
     X->s = 1;
     X->use_mempool = use_mempool;
@@ -98,31 +80,31 @@
     X->p = NULL;
 }
 
-void mbedtls_mpi_init( mbedtls_mpi *X )
+void mbedtls_mpi_init(mbedtls_mpi *X)
 {
-    mpi_init( X, 0 /*use_mempool*/ );
+    mpi_init(X, 0 /*use_mempool*/);
 }
 
-void mbedtls_mpi_init_mempool( mbedtls_mpi *X )
+void mbedtls_mpi_init_mempool(mbedtls_mpi *X)
 {
-    mpi_init( X, !!mbedtls_mpi_mempool /*use_mempool*/ );
+    mpi_init(X, !!mbedtls_mpi_mempool /*use_mempool*/);
 }
 
 /*
  * Unallocate one MPI
  */
-void mbedtls_mpi_free( mbedtls_mpi *X )
+void mbedtls_mpi_free(mbedtls_mpi *X)
 {
-    if( X == NULL )
+    if (X == NULL) {
         return;
+    }
 
-    if( X->p != NULL )
-    {
-        mbedtls_mpi_zeroize( X->p, X->n );
-        if( X->use_mempool )
-            mempool_free( mbedtls_mpi_mempool, X->p );
+    if (X->p != NULL) {
+        mbedtls_mpi_zeroize(X->p, X->n);
+        if(X->use_mempool)
+            mempool_free(mbedtls_mpi_mempool, X->p);
         else
-            mbedtls_free( X->p );
+        mbedtls_free(X->p);
     }
 
     X->s = 1;
@@ -133,121 +115,112 @@
 /*
  * Enlarge to the specified number of limbs
  */
-int mbedtls_mpi_grow( mbedtls_mpi *X, size_t nblimbs )
+int mbedtls_mpi_grow(mbedtls_mpi *X, size_t nblimbs)
 {
     mbedtls_mpi_uint *p;
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    if( nblimbs > MBEDTLS_MPI_MAX_LIMBS )
-        return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+    if (nblimbs > MBEDTLS_MPI_MAX_LIMBS) {
+        return MBEDTLS_ERR_MPI_ALLOC_FAILED;
+    }
 
-    if( X->n < nblimbs )
-    {
-        if( X->use_mempool )
-        {
-            p = mempool_alloc( mbedtls_mpi_mempool, nblimbs * ciL );
-            if( p == NULL )
-                return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
-            memset( p, 0, nblimbs * ciL );
-        }
-        else
-        {
-            p = (mbedtls_mpi_uint*)mbedtls_calloc( nblimbs, ciL );
-            if( p == NULL )
-                return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+    if (X->n < nblimbs) {
+        if(X->use_mempool) {
+            p = mempool_alloc(mbedtls_mpi_mempool, nblimbs * ciL);
+            if(p == NULL)
+                return MBEDTLS_ERR_MPI_ALLOC_FAILED;
+            memset(p, 0, nblimbs * ciL);
+        } else {
+                p = (mbedtls_mpi_uint *) mbedtls_calloc(nblimbs, ciL);
+                if (p == NULL)
+                    return MBEDTLS_ERR_MPI_ALLOC_FAILED;
         }
 
-        if( X->p != NULL )
-        {
-            memcpy( p, X->p, X->n * ciL );
-            mbedtls_mpi_zeroize( X->p, X->n );
-            if( X->use_mempool )
-                mempool_free( mbedtls_mpi_mempool, X->p);
+        if (X->p != NULL) {
+            memcpy(p, X->p, X->n * ciL);
+            mbedtls_mpi_zeroize(X->p, X->n);
+            if (X->use_mempool)
+                mempool_free(mbedtls_mpi_mempool, X->p);
             else
-                mbedtls_free( X->p );
+                mbedtls_free(X->p);
         }
 
         X->n = nblimbs;
         X->p = p;
     }
 
-    return( 0 );
+    return 0;
 }
 
 /*
  * Resize down as much as possible,
  * while keeping at least the specified number of limbs
  */
-int mbedtls_mpi_shrink( mbedtls_mpi *X, size_t nblimbs )
+int mbedtls_mpi_shrink(mbedtls_mpi *X, size_t nblimbs)
 {
     mbedtls_mpi_uint *p;
     size_t i;
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    if( nblimbs > MBEDTLS_MPI_MAX_LIMBS )
-        return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
+    if (nblimbs > MBEDTLS_MPI_MAX_LIMBS) {
+        return MBEDTLS_ERR_MPI_ALLOC_FAILED;
+    }
 
     /* Actually resize up if there are currently fewer than nblimbs limbs. */
-    if( X->n <= nblimbs )
-        return( mbedtls_mpi_grow( X, nblimbs ) );
+    if (X->n <= nblimbs) {
+        return mbedtls_mpi_grow(X, nblimbs);
+    }
     /* After this point, then X->n > nblimbs and in particular X->n > 0. */
 
-    for( i = X->n - 1; i > 0; i-- )
-        if( X->p[i] != 0 )
+    for (i = X->n - 1; i > 0; i--) {
+        if (X->p[i] != 0) {
             break;
+        }
+    }
     i++;
 
-    if( i < nblimbs )
+    if (i < nblimbs) {
         i = nblimbs;
-
-    if( X->use_mempool )
-    {
-        p = mempool_alloc( mbedtls_mpi_mempool, i * ciL );
-        if( p == NULL )
-            return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
-        memset( p, 0, i * ciL );
-    }
-    else
-    {
-        p = (mbedtls_mpi_uint*)mbedtls_calloc( i, ciL );
-        if( p == NULL )
-            return( MBEDTLS_ERR_MPI_ALLOC_FAILED );
     }
 
-    if( X->p != NULL )
-    {
-        memcpy( p, X->p, i * ciL );
-        mbedtls_mpi_zeroize( X->p, X->n );
-        if( X->use_mempool )
-            mempool_free( mbedtls_mpi_mempool, X->p );
+    if (X->use_mempool) {
+        p = mempool_alloc(mbedtls_mpi_mempool, i * ciL);
+        if (p == NULL)
+            return MBEDTLS_ERR_MPI_ALLOC_FAILED;
+        memset(p, 0, i * ciL);
+    } else {
+        if ((p = (mbedtls_mpi_uint *) mbedtls_calloc(i, ciL)) == NULL)
+            return MBEDTLS_ERR_MPI_ALLOC_FAILED;
+    }
+
+    if (X->p != NULL) {
+        memcpy(p, X->p, i * ciL);
+        mbedtls_mpi_zeroize(X->p, X->n);
+        if (X->use_mempool)
+            mempool_free(mbedtls_mpi_mempool, X->p);
         else
-            mbedtls_free( X->p );
+            mbedtls_free(X->p);
     }
 
     X->n = i;
     X->p = p;
 
-    return( 0 );
+    return 0;
 }
 
 /* Resize X to have exactly n limbs and set it to 0. */
-static int mbedtls_mpi_resize_clear( mbedtls_mpi *X, size_t limbs )
+static int mbedtls_mpi_resize_clear(mbedtls_mpi *X, size_t limbs)
 {
-    if( limbs == 0 )
-    {
-        mbedtls_mpi_free( X );
-        return( 0 );
-    }
-    else if( X->n == limbs )
-    {
-        memset( X->p, 0, limbs * ciL );
+    if (limbs == 0) {
+        mbedtls_mpi_free(X);
+        return 0;
+    } else if (X->n == limbs) {
+        memset(X->p, 0, limbs * ciL);
         X->s = 1;
-        return( 0 );
-    }
-    else
-    {
-        mbedtls_mpi_free( X );
-        return( mbedtls_mpi_grow( X, limbs ) );
+        return 0;
+    } else {
+        mbedtls_mpi_free(X);
+        return mbedtls_mpi_grow(X, limbs);
     }
 }
 
@@ -260,390 +233,374 @@
  * but some code in the bignum module relies on this property, for example
  * in mbedtls_mpi_exp_mod().
  */
-int mbedtls_mpi_copy( mbedtls_mpi *X, const mbedtls_mpi *Y )
+int mbedtls_mpi_copy(mbedtls_mpi *X, const mbedtls_mpi *Y)
 {
     int ret = 0;
     size_t i;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( Y != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(Y != NULL);
 
-    if( X == Y )
-        return( 0 );
-
-    if( Y->n == 0 )
-    {
-        if( X->n != 0 )
-        {
-            X->s = 1;
-            memset( X->p, 0, X->n * ciL );
-        }
-        return( 0 );
+    if (X == Y) {
+        return 0;
     }
 
-    for( i = Y->n - 1; i > 0; i-- )
-        if( Y->p[i] != 0 )
+    if (Y->n == 0) {
+        if (X->n != 0) {
+            X->s = 1;
+            memset(X->p, 0, X->n * ciL);
+        }
+        return 0;
+    }
+
+    for (i = Y->n - 1; i > 0; i--) {
+        if (Y->p[i] != 0) {
             break;
+        }
+    }
     i++;
 
     X->s = Y->s;
 
-    if( X->n < i )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i ) );
-    }
-    else
-    {
-        memset( X->p + i, 0, ( X->n - i ) * ciL );
+    if (X->n < i) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, i));
+    } else {
+        memset(X->p + i, 0, (X->n - i) * ciL);
     }
 
-    memcpy( X->p, Y->p, i * ciL );
+    memcpy(X->p, Y->p, i * ciL);
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Swap the contents of X and Y
  */
-void mbedtls_mpi_swap( mbedtls_mpi *X, mbedtls_mpi *Y )
+void mbedtls_mpi_swap(mbedtls_mpi *X, mbedtls_mpi *Y)
 {
     mbedtls_mpi T;
-    MPI_VALIDATE( X != NULL );
-    MPI_VALIDATE( Y != NULL );
+    MPI_VALIDATE(X != NULL);
+    MPI_VALIDATE(Y != NULL);
 
-    memcpy( &T,  X, sizeof( mbedtls_mpi ) );
-    memcpy(  X,  Y, sizeof( mbedtls_mpi ) );
-    memcpy(  Y, &T, sizeof( mbedtls_mpi ) );
+    memcpy(&T,  X, sizeof(mbedtls_mpi));
+    memcpy(X,  Y, sizeof(mbedtls_mpi));
+    memcpy(Y, &T, sizeof(mbedtls_mpi));
+}
+
+static inline mbedtls_mpi_uint mpi_sint_abs(mbedtls_mpi_sint z)
+{
+    if (z >= 0) {
+        return z;
+    }
+    /* Take care to handle the most negative value (-2^(biL-1)) correctly.
+     * A naive -z would have undefined behavior.
+     * Write this in a way that makes popular compilers happy (GCC, Clang,
+     * MSVC). */
+    return (mbedtls_mpi_uint) 0 - (mbedtls_mpi_uint) z;
 }
 
 /*
  * Set value from integer
  */
-int mbedtls_mpi_lset( mbedtls_mpi *X, mbedtls_mpi_sint z )
+int mbedtls_mpi_lset(mbedtls_mpi *X, mbedtls_mpi_sint z)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, 1 ) );
-    memset( X->p, 0, X->n * ciL );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, 1));
+    memset(X->p, 0, X->n * ciL);
 
-    X->p[0] = ( z < 0 ) ? -z : z;
-    X->s    = ( z < 0 ) ? -1 : 1;
+    X->p[0] = mpi_sint_abs(z);
+    X->s    = (z < 0) ? -1 : 1;
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Get a specific bit
  */
-int mbedtls_mpi_get_bit( const mbedtls_mpi *X, size_t pos )
+int mbedtls_mpi_get_bit(const mbedtls_mpi *X, size_t pos)
 {
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    if( X->n * biL <= pos )
-        return( 0 );
+    if (X->n * biL <= pos) {
+        return 0;
+    }
 
-    return( ( X->p[pos / biL] >> ( pos % biL ) ) & 0x01 );
+    return (X->p[pos / biL] >> (pos % biL)) & 0x01;
 }
 
-/* Get a specific byte, without range checks. */
-#define GET_BYTE( X, i )                                \
-    ( ( ( X )->p[( i ) / ciL] >> ( ( ( i ) % ciL ) * 8 ) ) & 0xff )
-
 /*
  * Set a bit to a specific value of 0 or 1
  */
-int mbedtls_mpi_set_bit( mbedtls_mpi *X, size_t pos, unsigned char val )
+int mbedtls_mpi_set_bit(mbedtls_mpi *X, size_t pos, unsigned char val)
 {
     int ret = 0;
     size_t off = pos / biL;
     size_t idx = pos % biL;
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    if( val != 0 && val != 1 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-
-    if( X->n * biL <= pos )
-    {
-        if( val == 0 )
-            return( 0 );
-
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, off + 1 ) );
+    if (val != 0 && val != 1) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
     }
 
-    X->p[off] &= ~( (mbedtls_mpi_uint) 0x01 << idx );
+    if (X->n * biL <= pos) {
+        if (val == 0) {
+            return 0;
+        }
+
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, off + 1));
+    }
+
+    X->p[off] &= ~((mbedtls_mpi_uint) 0x01 << idx);
     X->p[off] |= (mbedtls_mpi_uint) val << idx;
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Return the number of less significant zero-bits
  */
-size_t mbedtls_mpi_lsb( const mbedtls_mpi *X )
+size_t mbedtls_mpi_lsb(const mbedtls_mpi *X)
 {
     size_t i, j, count = 0;
-    MBEDTLS_INTERNAL_VALIDATE_RET( X != NULL, 0 );
+    MBEDTLS_INTERNAL_VALIDATE_RET(X != NULL, 0);
 
-    for( i = 0; i < X->n; i++ )
-        for( j = 0; j < biL; j++, count++ )
-            if( ( ( X->p[i] >> j ) & 1 ) != 0 )
-                return( count );
-
-    return( 0 );
-}
-
-/*
- * Count leading zero bits in a given integer
- */
-static size_t mbedtls_clz( const mbedtls_mpi_uint x )
-{
-    size_t j;
-    mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
-
-    for( j = 0; j < biL; j++ )
-    {
-        if( x & mask ) break;
-
-        mask >>= 1;
+    for (i = 0; i < X->n; i++) {
+        for (j = 0; j < biL; j++, count++) {
+            if (((X->p[i] >> j) & 1) != 0) {
+                return count;
+            }
+        }
     }
 
-    return j;
+    return 0;
 }
 
 /*
  * Return the number of bits
  */
-size_t mbedtls_mpi_bitlen( const mbedtls_mpi *X )
+size_t mbedtls_mpi_bitlen(const mbedtls_mpi *X)
 {
-    size_t i, j;
-
-    if( X->n == 0 )
-        return( 0 );
-
-    for( i = X->n - 1; i > 0; i-- )
-        if( X->p[i] != 0 )
-            break;
-
-    j = biL - mbedtls_clz( X->p[i] );
-
-    return( ( i * biL ) + j );
+    return mbedtls_mpi_core_bitlen(X->p, X->n);
 }
 
 /*
  * Return the total size in bytes
  */
-size_t mbedtls_mpi_size( const mbedtls_mpi *X )
+size_t mbedtls_mpi_size(const mbedtls_mpi *X)
 {
-    return( ( mbedtls_mpi_bitlen( X ) + 7 ) >> 3 );
+    return (mbedtls_mpi_bitlen(X) + 7) >> 3;
 }
 
 /*
  * Convert an ASCII character to digit value
  */
-static int mpi_get_digit( mbedtls_mpi_uint *d, int radix, char c )
+static int mpi_get_digit(mbedtls_mpi_uint *d, int radix, char c)
 {
     *d = 255;
 
-    if( c >= 0x30 && c <= 0x39 ) *d = c - 0x30;
-    if( c >= 0x41 && c <= 0x46 ) *d = c - 0x37;
-    if( c >= 0x61 && c <= 0x66 ) *d = c - 0x57;
+    if (c >= 0x30 && c <= 0x39) {
+        *d = c - 0x30;
+    }
+    if (c >= 0x41 && c <= 0x46) {
+        *d = c - 0x37;
+    }
+    if (c >= 0x61 && c <= 0x66) {
+        *d = c - 0x57;
+    }
 
-    if( *d >= (mbedtls_mpi_uint) radix )
-        return( MBEDTLS_ERR_MPI_INVALID_CHARACTER );
+    if (*d >= (mbedtls_mpi_uint) radix) {
+        return MBEDTLS_ERR_MPI_INVALID_CHARACTER;
+    }
 
-    return( 0 );
+    return 0;
 }
 
 /*
  * Import from an ASCII string
  */
-int mbedtls_mpi_read_string( mbedtls_mpi *X, int radix, const char *s )
+int mbedtls_mpi_read_string(mbedtls_mpi *X, int radix, const char *s)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i, j, slen, n;
     int sign = 1;
     mbedtls_mpi_uint d;
     mbedtls_mpi T;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( s != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(s != NULL);
 
-    if( radix < 2 || radix > 16 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-
-    mbedtls_mpi_init_mempool( &T );
-
-    if( s[0] == 0 )
-    {
-        mbedtls_mpi_free( X );
-        return( 0 );
+    if (radix < 2 || radix > 16) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
     }
 
-    if( s[0] == '-' )
-    {
+    mbedtls_mpi_init_mempool(&T);
+
+    if (s[0] == 0) {
+        mbedtls_mpi_free(X);
+        return 0;
+    }
+
+    if (s[0] == '-') {
         ++s;
         sign = -1;
     }
 
-    slen = strlen( s );
+    slen = strlen(s);
 
-    if( radix == 16 )
-    {
-        if( slen > MPI_SIZE_T_MAX >> 2 )
-            return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-
-        n = BITS_TO_LIMBS( slen << 2 );
-
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
-
-        for( i = slen, j = 0; i > 0; i--, j++ )
-        {
-            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i - 1] ) );
-            X->p[j / ( 2 * ciL )] |= d << ( ( j % ( 2 * ciL ) ) << 2 );
+    if (radix == 16) {
+        if (slen > MPI_SIZE_T_MAX >> 2) {
+            return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
         }
-    }
-    else
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
 
-        for( i = 0; i < slen; i++ )
-        {
-            MBEDTLS_MPI_CHK( mpi_get_digit( &d, radix, s[i] ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T, X, radix ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, &T, d ) );
+        n = BITS_TO_LIMBS(slen << 2);
+
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, n));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
+
+        for (i = slen, j = 0; i > 0; i--, j++) {
+            MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i - 1]));
+            X->p[j / (2 * ciL)] |= d << ((j % (2 * ciL)) << 2);
+        }
+    } else {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
+
+        for (i = 0; i < slen; i++) {
+            MBEDTLS_MPI_CHK(mpi_get_digit(&d, radix, s[i]));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_mul_int(&T, X, radix));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, &T, d));
         }
     }
 
-    if( sign < 0 && mbedtls_mpi_bitlen( X ) != 0 )
+    if (sign < 0 && mbedtls_mpi_bitlen(X) != 0) {
         X->s = -1;
+    }
 
 cleanup:
 
-    mbedtls_mpi_free( &T );
+    mbedtls_mpi_free(&T);
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Helper to write the digits high-order first.
  */
-static int mpi_write_hlp( mbedtls_mpi *X, int radix,
-                          char **p, const size_t buflen )
+static int mpi_write_hlp(mbedtls_mpi *X, int radix,
+                         char **p, const size_t buflen)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     mbedtls_mpi_uint r;
     size_t length = 0;
     char *p_end = *p + buflen;
 
-    do
-    {
-        if( length >= buflen )
-        {
-            return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+    do {
+        if (length >= buflen) {
+            return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
         }
 
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, radix ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_div_int( X, NULL, X, radix ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, radix));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_div_int(X, NULL, X, radix));
         /*
          * Write the residue in the current position, as an ASCII character.
          */
-        if( r < 0xA )
-            *(--p_end) = (char)( '0' + r );
-        else
-            *(--p_end) = (char)( 'A' + ( r - 0xA ) );
+        if (r < 0xA) {
+            *(--p_end) = (char) ('0' + r);
+        } else {
+            *(--p_end) = (char) ('A' + (r - 0xA));
+        }
 
         length++;
-    } while( mbedtls_mpi_cmp_int( X, 0 ) != 0 );
+    } while (mbedtls_mpi_cmp_int(X, 0) != 0);
 
-    memmove( *p, p_end, length );
+    memmove(*p, p_end, length);
     *p += length;
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Export into an ASCII string
  */
-int mbedtls_mpi_write_string( const mbedtls_mpi *X, int radix,
-                              char *buf, size_t buflen, size_t *olen )
+int mbedtls_mpi_write_string(const mbedtls_mpi *X, int radix,
+                             char *buf, size_t buflen, size_t *olen)
 {
     int ret = 0;
     size_t n;
     char *p;
     mbedtls_mpi T;
-    MPI_VALIDATE_RET( X    != NULL );
-    MPI_VALIDATE_RET( olen != NULL );
-    MPI_VALIDATE_RET( buflen == 0 || buf != NULL );
+    MPI_VALIDATE_RET(X    != NULL);
+    MPI_VALIDATE_RET(olen != NULL);
+    MPI_VALIDATE_RET(buflen == 0 || buf != NULL);
 
-    if( radix < 2 || radix > 16 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (radix < 2 || radix > 16) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    n = mbedtls_mpi_bitlen( X ); /* Number of bits necessary to present `n`. */
-    if( radix >=  4 ) n >>= 1;   /* Number of 4-adic digits necessary to present
+    n = mbedtls_mpi_bitlen(X);   /* Number of bits necessary to present `n`. */
+    if (radix >=  4) {
+        n >>= 1;                 /* Number of 4-adic digits necessary to present
                                   * `n`. If radix > 4, this might be a strict
                                   * overapproximation of the number of
                                   * radix-adic digits needed to present `n`. */
-    if( radix >= 16 ) n >>= 1;   /* Number of hexadecimal digits necessary to
+    }
+    if (radix >= 16) {
+        n >>= 1;                 /* Number of hexadecimal digits necessary to
                                   * present `n`. */
 
+    }
     n += 1; /* Terminating null byte */
     n += 1; /* Compensate for the divisions above, which round down `n`
              * in case it's not even. */
     n += 1; /* Potential '-'-sign. */
-    n += ( n & 1 ); /* Make n even to have enough space for hexadecimal writing,
+    n += (n & 1);   /* Make n even to have enough space for hexadecimal writing,
                      * which always uses an even number of hex-digits. */
 
-    if( buflen < n )
-    {
+    if (buflen < n) {
         *olen = n;
-        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
     }
 
     p = buf;
-    mbedtls_mpi_init_mempool( &T );
+    mbedtls_mpi_init_mempool(&T);
 
-    if( X->s == -1 )
-    {
+    if (X->s == -1) {
         *p++ = '-';
         buflen--;
     }
 
-    if( radix == 16 )
-    {
+    if (radix == 16) {
         int c;
         size_t i, j, k;
 
-        for( i = X->n, k = 0; i > 0; i-- )
-        {
-            for( j = ciL; j > 0; j-- )
-            {
-                c = ( X->p[i - 1] >> ( ( j - 1 ) << 3) ) & 0xFF;
+        for (i = X->n, k = 0; i > 0; i--) {
+            for (j = ciL; j > 0; j--) {
+                c = (X->p[i - 1] >> ((j - 1) << 3)) & 0xFF;
 
-                if( c == 0 && k == 0 && ( i + j ) != 2 )
+                if (c == 0 && k == 0 && (i + j) != 2) {
                     continue;
+                }
 
                 *(p++) = "0123456789ABCDEF" [c / 16];
                 *(p++) = "0123456789ABCDEF" [c % 16];
                 k = 1;
             }
         }
-    }
-    else
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T, X ) );
+    } else {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&T, X));
 
-        if( T.s == -1 )
+        if (T.s == -1) {
             T.s = 1;
+        }
 
-        MBEDTLS_MPI_CHK( mpi_write_hlp( &T, radix, &p, buflen ) );
+        MBEDTLS_MPI_CHK(mpi_write_hlp(&T, radix, &p, buflen));
     }
 
     *p++ = '\0';
@@ -651,16 +608,16 @@
 
 cleanup:
 
-    mbedtls_mpi_free( &T );
+    mbedtls_mpi_free(&T);
 
-    return( ret );
+    return ret;
 }
 
 #if defined(MBEDTLS_FS_IO)
 /*
  * Read X from an opened file
  */
-int mbedtls_mpi_read_file( mbedtls_mpi *X, int radix, FILE *fin )
+int mbedtls_mpi_read_file(mbedtls_mpi *X, int radix, FILE *fin)
 {
     mbedtls_mpi_uint d;
     size_t slen;
@@ -669,37 +626,46 @@
      * Buffer should have space for (short) label and decimal formatted MPI,
      * newline characters and '\0'
      */
-    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
+    char s[MBEDTLS_MPI_RW_BUFFER_SIZE];
 
-    MPI_VALIDATE_RET( X   != NULL );
-    MPI_VALIDATE_RET( fin != NULL );
+    MPI_VALIDATE_RET(X   != NULL);
+    MPI_VALIDATE_RET(fin != NULL);
 
-    if( radix < 2 || radix > 16 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (radix < 2 || radix > 16) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    memset( s, 0, sizeof( s ) );
-    if( fgets( s, sizeof( s ) - 1, fin ) == NULL )
-        return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
+    memset(s, 0, sizeof(s));
+    if (fgets(s, sizeof(s) - 1, fin) == NULL) {
+        return MBEDTLS_ERR_MPI_FILE_IO_ERROR;
+    }
 
-    slen = strlen( s );
-    if( slen == sizeof( s ) - 2 )
-        return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
+    slen = strlen(s);
+    if (slen == sizeof(s) - 2) {
+        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
+    }
 
-    if( slen > 0 && s[slen - 1] == '\n' ) { slen--; s[slen] = '\0'; }
-    if( slen > 0 && s[slen - 1] == '\r' ) { slen--; s[slen] = '\0'; }
+    if (slen > 0 && s[slen - 1] == '\n') {
+        slen--; s[slen] = '\0';
+    }
+    if (slen > 0 && s[slen - 1] == '\r') {
+        slen--; s[slen] = '\0';
+    }
 
     p = s + slen;
-    while( p-- > s )
-        if( mpi_get_digit( &d, radix, *p ) != 0 )
+    while (p-- > s) {
+        if (mpi_get_digit(&d, radix, *p) != 0) {
             break;
+        }
+    }
 
-    return( mbedtls_mpi_read_string( X, radix, p + 1 ) );
+    return mbedtls_mpi_read_string(X, radix, p + 1);
 }
 
 /*
  * Write X into an opened file (or stdout if fout == NULL)
  */
-int mbedtls_mpi_write_file( const char *p, const mbedtls_mpi *X, int radix, FILE *fout )
+int mbedtls_mpi_write_file(const char *p, const mbedtls_mpi *X, int radix, FILE *fout)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t n, slen, plen;
@@ -707,144 +673,57 @@
      * Buffer should have space for (short) label and decimal formatted MPI,
      * newline characters and '\0'
      */
-    char s[ MBEDTLS_MPI_RW_BUFFER_SIZE ];
-    MPI_VALIDATE_RET( X != NULL );
+    char s[MBEDTLS_MPI_RW_BUFFER_SIZE];
+    MPI_VALIDATE_RET(X != NULL);
 
-    if( radix < 2 || radix > 16 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (radix < 2 || radix > 16) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    memset( s, 0, sizeof( s ) );
+    memset(s, 0, sizeof(s));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_write_string( X, radix, s, sizeof( s ) - 2, &n ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_write_string(X, radix, s, sizeof(s) - 2, &n));
 
-    if( p == NULL ) p = "";
+    if (p == NULL) {
+        p = "";
+    }
 
-    plen = strlen( p );
-    slen = strlen( s );
+    plen = strlen(p);
+    slen = strlen(s);
     s[slen++] = '\r';
     s[slen++] = '\n';
 
-    if( fout != NULL )
-    {
-        if( fwrite( p, 1, plen, fout ) != plen ||
-            fwrite( s, 1, slen, fout ) != slen )
-            return( MBEDTLS_ERR_MPI_FILE_IO_ERROR );
+    if (fout != NULL) {
+        if (fwrite(p, 1, plen, fout) != plen ||
+            fwrite(s, 1, slen, fout) != slen) {
+            return MBEDTLS_ERR_MPI_FILE_IO_ERROR;
+        }
+    } else {
+        mbedtls_printf("%s%s", p, s);
     }
-    else
-        mbedtls_printf( "%s%s", p, s );
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 #endif /* MBEDTLS_FS_IO */
 
-
-/* Convert a big-endian byte array aligned to the size of mbedtls_mpi_uint
- * into the storage form used by mbedtls_mpi. */
-
-static mbedtls_mpi_uint mpi_uint_bigendian_to_host_c( mbedtls_mpi_uint x )
-{
-    uint8_t i;
-    unsigned char *x_ptr;
-    mbedtls_mpi_uint tmp = 0;
-
-    for( i = 0, x_ptr = (unsigned char*) &x; i < ciL; i++, x_ptr++ )
-    {
-        tmp <<= CHAR_BIT;
-        tmp |= (mbedtls_mpi_uint) *x_ptr;
-    }
-
-    return( tmp );
-}
-
-static mbedtls_mpi_uint mpi_uint_bigendian_to_host( mbedtls_mpi_uint x )
-{
-#if defined(__BYTE_ORDER__)
-
-/* Nothing to do on bigendian systems. */
-#if ( __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
-    return( x );
-#endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
-
-#if ( __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
-
-/* For GCC and Clang, have builtins for byte swapping. */
-#if defined(__GNUC__) && defined(__GNUC_PREREQ)
-#if __GNUC_PREREQ(4,3)
-#define have_bswap
-#endif
-#endif
-
-#if defined(__clang__) && defined(__has_builtin)
-#if __has_builtin(__builtin_bswap32)  &&                 \
-    __has_builtin(__builtin_bswap64)
-#define have_bswap
-#endif
-#endif
-
-#if defined(have_bswap)
-    /* The compiler is hopefully able to statically evaluate this! */
-    switch( sizeof(mbedtls_mpi_uint) )
-    {
-        case 4:
-            return( __builtin_bswap32(x) );
-        case 8:
-            return( __builtin_bswap64(x) );
-    }
-#endif
-#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
-#endif /* __BYTE_ORDER__ */
-
-    /* Fall back to C-based reordering if we don't know the byte order
-     * or we couldn't use a compiler-specific builtin. */
-    return( mpi_uint_bigendian_to_host_c( x ) );
-}
-
-static void mpi_bigendian_to_host( mbedtls_mpi_uint * const p, size_t limbs )
-{
-    mbedtls_mpi_uint *cur_limb_left;
-    mbedtls_mpi_uint *cur_limb_right;
-    if( limbs == 0 )
-        return;
-
-    /*
-     * Traverse limbs and
-     * - adapt byte-order in each limb
-     * - swap the limbs themselves.
-     * For that, simultaneously traverse the limbs from left to right
-     * and from right to left, as long as the left index is not bigger
-     * than the right index (it's not a problem if limbs is odd and the
-     * indices coincide in the last iteration).
-     */
-    for( cur_limb_left = p, cur_limb_right = p + ( limbs - 1 );
-         cur_limb_left <= cur_limb_right;
-         cur_limb_left++, cur_limb_right-- )
-    {
-        mbedtls_mpi_uint tmp;
-        /* Note that if cur_limb_left == cur_limb_right,
-         * this code effectively swaps the bytes only once. */
-        tmp             = mpi_uint_bigendian_to_host( *cur_limb_left  );
-        *cur_limb_left  = mpi_uint_bigendian_to_host( *cur_limb_right );
-        *cur_limb_right = tmp;
-    }
-}
-
 /*
  * Import X from unsigned binary data, little endian
+ *
+ * This function is guaranteed to return an MPI with exactly the necessary
+ * number of limbs (in particular, it does not skip 0s in the input).
  */
-int mbedtls_mpi_read_binary_le( mbedtls_mpi *X,
-                                const unsigned char *buf, size_t buflen )
+int mbedtls_mpi_read_binary_le(mbedtls_mpi *X,
+                               const unsigned char *buf, size_t buflen)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t i;
-    size_t const limbs = CHARS_TO_LIMBS( buflen );
+    const size_t limbs = CHARS_TO_LIMBS(buflen);
 
     /* Ensure that target MPI has exactly the necessary number of limbs */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_resize_clear( X, limbs ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_resize_clear(X, limbs));
 
-    for( i = 0; i < buflen; i++ )
-        X->p[i / ciL] |= ((mbedtls_mpi_uint) buf[i]) << ((i % ciL) << 3);
+    MBEDTLS_MPI_CHK(mbedtls_mpi_core_read_le(X->p, X->n, buf, buflen));
 
 cleanup:
 
@@ -853,34 +732,27 @@
      * upon failure is not necessary because failure only can happen before any
      * input is copied.
      */
-    return( ret );
+    return ret;
 }
 
 /*
  * Import X from unsigned binary data, big endian
+ *
+ * This function is guaranteed to return an MPI with exactly the necessary
+ * number of limbs (in particular, it does not skip 0s in the input).
  */
-int mbedtls_mpi_read_binary( mbedtls_mpi *X, const unsigned char *buf, size_t buflen )
+int mbedtls_mpi_read_binary(mbedtls_mpi *X, const unsigned char *buf, size_t buflen)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t const limbs    = CHARS_TO_LIMBS( buflen );
-    size_t const overhead = ( limbs * ciL ) - buflen;
-    unsigned char *Xp;
+    const size_t limbs = CHARS_TO_LIMBS(buflen);
 
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( buflen == 0 || buf != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(buflen == 0 || buf != NULL);
 
     /* Ensure that target MPI has exactly the necessary number of limbs */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_resize_clear( X, limbs ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_resize_clear(X, limbs));
 
-    /* Avoid calling `memcpy` with NULL source or destination argument,
-     * even if buflen is 0. */
-    if( buflen != 0 )
-    {
-        Xp = (unsigned char*) X->p;
-        memcpy( Xp + overhead, buf, buflen );
-
-        mpi_bigendian_to_host( X->p, limbs );
-    }
+    MBEDTLS_MPI_CHK(mbedtls_mpi_core_read_be(X->p, X->n, buf, buflen));
 
 cleanup:
 
@@ -889,133 +761,66 @@
      * upon failure is not necessary because failure only can happen before any
      * input is copied.
      */
-    return( ret );
+    return ret;
 }
 
 /*
  * Export X into unsigned binary data, little endian
  */
-int mbedtls_mpi_write_binary_le( const mbedtls_mpi *X,
-                                 unsigned char *buf, size_t buflen )
+int mbedtls_mpi_write_binary_le(const mbedtls_mpi *X,
+                                unsigned char *buf, size_t buflen)
 {
-    size_t stored_bytes = X->n * ciL;
-    size_t bytes_to_copy;
-    size_t i;
-
-    if( stored_bytes < buflen )
-    {
-        bytes_to_copy = stored_bytes;
-    }
-    else
-    {
-        bytes_to_copy = buflen;
-
-        /* The output buffer is smaller than the allocated size of X.
-         * However X may fit if its leading bytes are zero. */
-        for( i = bytes_to_copy; i < stored_bytes; i++ )
-        {
-            if( GET_BYTE( X, i ) != 0 )
-                return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
-        }
-    }
-
-    for( i = 0; i < bytes_to_copy; i++ )
-        buf[i] = GET_BYTE( X, i );
-
-    if( stored_bytes < buflen )
-    {
-        /* Write trailing 0 bytes */
-        memset( buf + stored_bytes, 0, buflen - stored_bytes );
-    }
-
-    return( 0 );
+    return mbedtls_mpi_core_write_le(X->p, X->n, buf, buflen);
 }
 
 /*
  * Export X into unsigned binary data, big endian
  */
-int mbedtls_mpi_write_binary( const mbedtls_mpi *X,
-                              unsigned char *buf, size_t buflen )
+int mbedtls_mpi_write_binary(const mbedtls_mpi *X,
+                             unsigned char *buf, size_t buflen)
 {
-    size_t stored_bytes;
-    size_t bytes_to_copy;
-    unsigned char *p;
-    size_t i;
-
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( buflen == 0 || buf != NULL );
-
-    stored_bytes = X->n * ciL;
-
-    if( stored_bytes < buflen )
-    {
-        /* There is enough space in the output buffer. Write initial
-         * null bytes and record the position at which to start
-         * writing the significant bytes. In this case, the execution
-         * trace of this function does not depend on the value of the
-         * number. */
-        bytes_to_copy = stored_bytes;
-        p = buf + buflen - stored_bytes;
-        memset( buf, 0, buflen - stored_bytes );
-    }
-    else
-    {
-        /* The output buffer is smaller than the allocated size of X.
-         * However X may fit if its leading bytes are zero. */
-        bytes_to_copy = buflen;
-        p = buf;
-        for( i = bytes_to_copy; i < stored_bytes; i++ )
-        {
-            if( GET_BYTE( X, i ) != 0 )
-                return( MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL );
-        }
-    }
-
-    for( i = 0; i < bytes_to_copy; i++ )
-        p[bytes_to_copy - i - 1] = GET_BYTE( X, i );
-
-    return( 0 );
+    return mbedtls_mpi_core_write_be(X->p, X->n, buf, buflen);
 }
 
 /*
  * Left-shift: X <<= count
  */
-int mbedtls_mpi_shift_l( mbedtls_mpi *X, size_t count )
+int mbedtls_mpi_shift_l(mbedtls_mpi *X, size_t count)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i, v0, t1;
     mbedtls_mpi_uint r0 = 0, r1;
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    v0 = count / (biL    );
+    v0 = count / (biL);
     t1 = count & (biL - 1);
 
-    i = mbedtls_mpi_bitlen( X ) + count;
+    i = mbedtls_mpi_bitlen(X) + count;
 
-    if( X->n * biL < i )
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, BITS_TO_LIMBS( i ) ) );
+    if (X->n * biL < i) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, BITS_TO_LIMBS(i)));
+    }
 
     ret = 0;
 
     /*
      * shift by count / limb_size
      */
-    if( v0 > 0 )
-    {
-        for( i = X->n; i > v0; i-- )
+    if (v0 > 0) {
+        for (i = X->n; i > v0; i--) {
             X->p[i - 1] = X->p[i - v0 - 1];
+        }
 
-        for( ; i > 0; i-- )
+        for (; i > 0; i--) {
             X->p[i - 1] = 0;
+        }
     }
 
     /*
      * shift by count % limb_size
      */
-    if( t1 > 0 )
-    {
-        for( i = v0; i < X->n; i++ )
-        {
+    if (t1 > 0) {
+        for (i = v0; i < X->n; i++) {
             r1 = X->p[i] >> (biL - t1);
             X->p[i] <<= t1;
             X->p[i] |= r0;
@@ -1025,552 +830,424 @@
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Right-shift: X >>= count
  */
-int mbedtls_mpi_shift_r( mbedtls_mpi *X, size_t count )
+int mbedtls_mpi_shift_r(mbedtls_mpi *X, size_t count)
 {
-    size_t i, v0, v1;
-    mbedtls_mpi_uint r0 = 0, r1;
-    MPI_VALIDATE_RET( X != NULL );
-
-    v0 = count /  biL;
-    v1 = count & (biL - 1);
-
-    if( v0 > X->n || ( v0 == X->n && v1 > 0 ) )
-        return mbedtls_mpi_lset( X, 0 );
-
-    /*
-     * shift by count / limb_size
-     */
-    if( v0 > 0 )
-    {
-        for( i = 0; i < X->n - v0; i++ )
-            X->p[i] = X->p[i + v0];
-
-        for( ; i < X->n; i++ )
-            X->p[i] = 0;
+    MPI_VALIDATE_RET(X != NULL);
+    if (X->n != 0) {
+        mbedtls_mpi_core_shift_r(X->p, X->n, count);
     }
-
-    /*
-     * shift by count % limb_size
-     */
-    if( v1 > 0 )
-    {
-        for( i = X->n; i > 0; i-- )
-        {
-            r1 = X->p[i - 1] << (biL - v1);
-            X->p[i - 1] >>= v1;
-            X->p[i - 1] |= r0;
-            r0 = r1;
-        }
-    }
-
-    return( 0 );
+    return 0;
 }
 
 /*
  * Compare unsigned values
  */
-int mbedtls_mpi_cmp_abs( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+int mbedtls_mpi_cmp_abs(const mbedtls_mpi *X, const mbedtls_mpi *Y)
 {
     size_t i, j;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( Y != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(Y != NULL);
 
-    for( i = X->n; i > 0; i-- )
-        if( X->p[i - 1] != 0 )
+    for (i = X->n; i > 0; i--) {
+        if (X->p[i - 1] != 0) {
             break;
-
-    for( j = Y->n; j > 0; j-- )
-        if( Y->p[j - 1] != 0 )
-            break;
-
-    if( i == 0 && j == 0 )
-        return( 0 );
-
-    if( i > j ) return(  1 );
-    if( j > i ) return( -1 );
-
-    for( ; i > 0; i-- )
-    {
-        if( X->p[i - 1] > Y->p[i - 1] ) return(  1 );
-        if( X->p[i - 1] < Y->p[i - 1] ) return( -1 );
+        }
     }
 
-    return( 0 );
+    for (j = Y->n; j > 0; j--) {
+        if (Y->p[j - 1] != 0) {
+            break;
+        }
+    }
+
+    if (i == 0 && j == 0) {
+        return 0;
+    }
+
+    if (i > j) {
+        return 1;
+    }
+    if (j > i) {
+        return -1;
+    }
+
+    for (; i > 0; i--) {
+        if (X->p[i - 1] > Y->p[i - 1]) {
+            return 1;
+        }
+        if (X->p[i - 1] < Y->p[i - 1]) {
+            return -1;
+        }
+    }
+
+    return 0;
 }
 
 /*
  * Compare signed values
  */
-int mbedtls_mpi_cmp_mpi( const mbedtls_mpi *X, const mbedtls_mpi *Y )
+int mbedtls_mpi_cmp_mpi(const mbedtls_mpi *X, const mbedtls_mpi *Y)
 {
     size_t i, j;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( Y != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(Y != NULL);
 
-    for( i = X->n; i > 0; i-- )
-        if( X->p[i - 1] != 0 )
+    for (i = X->n; i > 0; i--) {
+        if (X->p[i - 1] != 0) {
             break;
-
-    for( j = Y->n; j > 0; j-- )
-        if( Y->p[j - 1] != 0 )
-            break;
-
-    if( i == 0 && j == 0 )
-        return( 0 );
-
-    if( i > j ) return(  X->s );
-    if( j > i ) return( -Y->s );
-
-    if( X->s > 0 && Y->s < 0 ) return(  1 );
-    if( Y->s > 0 && X->s < 0 ) return( -1 );
-
-    for( ; i > 0; i-- )
-    {
-        if( X->p[i - 1] > Y->p[i - 1] ) return(  X->s );
-        if( X->p[i - 1] < Y->p[i - 1] ) return( -X->s );
+        }
     }
 
-    return( 0 );
+    for (j = Y->n; j > 0; j--) {
+        if (Y->p[j - 1] != 0) {
+            break;
+        }
+    }
+
+    if (i == 0 && j == 0) {
+        return 0;
+    }
+
+    if (i > j) {
+        return X->s;
+    }
+    if (j > i) {
+        return -Y->s;
+    }
+
+    if (X->s > 0 && Y->s < 0) {
+        return 1;
+    }
+    if (Y->s > 0 && X->s < 0) {
+        return -1;
+    }
+
+    for (; i > 0; i--) {
+        if (X->p[i - 1] > Y->p[i - 1]) {
+            return X->s;
+        }
+        if (X->p[i - 1] < Y->p[i - 1]) {
+            return -X->s;
+        }
+    }
+
+    return 0;
 }
 
 /*
  * Compare signed values
  */
-int mbedtls_mpi_cmp_int( const mbedtls_mpi *X, mbedtls_mpi_sint z )
+int mbedtls_mpi_cmp_int(const mbedtls_mpi *X, mbedtls_mpi_sint z)
 {
     mbedtls_mpi Y;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET( X != NULL );
+    MPI_VALIDATE_RET(X != NULL);
 
-    *p  = ( z < 0 ) ? -z : z;
-    Y.s = ( z < 0 ) ? -1 : 1;
+    *p  = mpi_sint_abs(z);
+    Y.s = (z < 0) ? -1 : 1;
     Y.n = 1;
     Y.p = p;
 
-    return( mbedtls_mpi_cmp_mpi( X, &Y ) );
+    return mbedtls_mpi_cmp_mpi(X, &Y);
 }
 
 /*
  * Unsigned addition: X = |A| + |B|  (HAC 14.7)
  */
-int mbedtls_mpi_add_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_add_abs(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t i, j;
-    mbedtls_mpi_uint *o, *p, c, tmp;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    size_t j;
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    if( X == B )
-    {
+    if (X == B) {
         const mbedtls_mpi *T = A; A = X; B = T;
     }
 
-    if( X != A )
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
+    if (X != A) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A));
+    }
 
     /*
-     * X should always be positive as a result of unsigned additions.
+     * X must always be positive as a result of unsigned additions.
      */
     X->s = 1;
 
-    for( j = B->n; j > 0; j-- )
-        if( B->p[j - 1] != 0 )
+    for (j = B->n; j > 0; j--) {
+        if (B->p[j - 1] != 0) {
             break;
-
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
-
-    o = B->p; p = X->p; c = 0;
-
-    /*
-     * tmp is used because it might happen that p == o
-     */
-    for( i = 0; i < j; i++, o++, p++ )
-    {
-        tmp= *o;
-        *p +=  c; c  = ( *p <  c );
-        *p += tmp; c += ( *p < tmp );
+        }
     }
 
-    while( c != 0 )
-    {
-        if( i >= X->n )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + 1 ) );
-            p = X->p + i;
+    /* Exit early to avoid undefined behavior on NULL+0 when X->n == 0
+     * and B is 0 (of any size). */
+    if (j == 0) {
+        return 0;
+    }
+
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j));
+
+    /* j is the number of non-zero limbs of B. Add those to X. */
+
+    mbedtls_mpi_uint *p = X->p;
+
+    mbedtls_mpi_uint c = mbedtls_mpi_core_add(p, p, B->p, j);
+
+    p += j;
+
+    /* Now propagate any carry */
+
+    while (c != 0) {
+        if (j >= X->n) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, j + 1));
+            p = X->p + j;
         }
 
-        *p += c; c = ( *p < c ); i++; p++;
+        *p += c; c = (*p < c); j++; p++;
     }
 
 cleanup:
 
-    return( ret );
-}
-
-/**
- * Helper for mbedtls_mpi subtraction.
- *
- * Calculate l - r where l and r have the same size.
- * This function operates modulo (2^ciL)^n and returns the carry
- * (1 if there was a wraparound, i.e. if `l < r`, and 0 otherwise).
- *
- * d may be aliased to l or r.
- *
- * \param n             Number of limbs of \p d, \p l and \p r.
- * \param[out] d        The result of the subtraction.
- * \param[in] l         The left operand.
- * \param[in] r         The right operand.
- *
- * \return              1 if `l < r`.
- *                      0 if `l >= r`.
- */
-static mbedtls_mpi_uint mpi_sub_hlp( size_t n,
-                                     mbedtls_mpi_uint *d,
-                                     const mbedtls_mpi_uint *l,
-                                     const mbedtls_mpi_uint *r )
-{
-    size_t i;
-    mbedtls_mpi_uint c = 0, t, z;
-
-    for( i = 0; i < n; i++ )
-    {
-        z = ( l[i] <  c );    t = l[i] - c;
-        c = ( t < r[i] ) + z; d[i] = t - r[i];
-    }
-
-    return( c );
+    return ret;
 }
 
 /*
  * Unsigned subtraction: X = |A| - |B|  (HAC 14.9, 14.10)
  */
-int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_sub_abs(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t n;
     mbedtls_mpi_uint carry;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    for( n = B->n; n > 0; n-- )
-        if( B->p[n - 1] != 0 )
+    for (n = B->n; n > 0; n--) {
+        if (B->p[n - 1] != 0) {
             break;
-    if( n > A->n )
-    {
+        }
+    }
+    if (n > A->n) {
         /* B >= (2^ciL)^n > A */
         ret = MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
         goto cleanup;
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, A->n ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, A->n));
 
     /* Set the high limbs of X to match A. Don't touch the lower limbs
      * because X might be aliased to B, and we must not overwrite the
      * significant digits of B. */
-    if( A->n > n )
-        memcpy( X->p + n, A->p + n, ( A->n - n ) * ciL );
-    if( X->n > A->n )
-        memset( X->p + A->n, 0, ( X->n - A->n ) * ciL );
+    if (A->n > n && A != X) {
+        memcpy(X->p + n, A->p + n, (A->n - n) * ciL);
+    }
+    if (X->n > A->n) {
+        memset(X->p + A->n, 0, (X->n - A->n) * ciL);
+    }
 
-    carry = mpi_sub_hlp( n, X->p, A->p, B->p );
-    if( carry != 0 )
-    {
-        /* Propagate the carry to the first nonzero limb of X. */
-        for( ; n < X->n && X->p[n] == 0; n++ )
-            --X->p[n];
-        /* If we ran out of space for the carry, it means that the result
-         * is negative. */
-        if( n == X->n )
-        {
+    carry = mbedtls_mpi_core_sub(X->p, A->p, B->p, n);
+    if (carry != 0) {
+        /* Propagate the carry through the rest of X. */
+        carry = mbedtls_mpi_core_sub_int(X->p + n, X->p + n, carry, X->n - n);
+
+        /* If we have further carry/borrow, the result is negative. */
+        if (carry != 0) {
             ret = MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
             goto cleanup;
         }
-        --X->p[n];
     }
 
     /* X should always be positive as a result of unsigned subtractions. */
     X->s = 1;
 
 cleanup:
-    return( ret );
+    return ret;
+}
+
+/* Common function for signed addition and subtraction.
+ * Calculate A + B * flip_B where flip_B is 1 or -1.
+ */
+static int add_sub_mpi(mbedtls_mpi *X,
+                       const mbedtls_mpi *A, const mbedtls_mpi *B,
+                       int flip_B)
+{
+    int ret, s;
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
+
+    s = A->s;
+    if (A->s * B->s * flip_B < 0) {
+        int cmp = mbedtls_mpi_cmp_abs(A, B);
+        if (cmp >= 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(X, A, B));
+            /* If |A| = |B|, the result is 0 and we must set the sign bit
+             * to +1 regardless of which of A or B was negative. Otherwise,
+             * since |A| > |B|, the sign is the sign of A. */
+            X->s = cmp == 0 ? 1 : s;
+        } else {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(X, B, A));
+            /* Since |A| < |B|, the sign is the opposite of A. */
+            X->s = -s;
+        }
+    } else {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_add_abs(X, A, B));
+        X->s = s;
+    }
+
+cleanup:
+
+    return ret;
 }
 
 /*
  * Signed addition: X = A + B
  */
-int mbedtls_mpi_add_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_add_mpi(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
-    int ret, s;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
-
-    s = A->s;
-    if( A->s * B->s < 0 )
-    {
-        if( mbedtls_mpi_cmp_abs( A, B ) >= 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, A, B ) );
-            X->s =  s;
-        }
-        else
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, B, A ) );
-            X->s = -s;
-        }
-    }
-    else
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
-        X->s = s;
-    }
-
-cleanup:
-
-    return( ret );
+    return add_sub_mpi(X, A, B, 1);
 }
 
 /*
  * Signed subtraction: X = A - B
  */
-int mbedtls_mpi_sub_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_sub_mpi(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
-    int ret, s;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
-
-    s = A->s;
-    if( A->s * B->s > 0 )
-    {
-        if( mbedtls_mpi_cmp_abs( A, B ) >= 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, A, B ) );
-            X->s =  s;
-        }
-        else
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( X, B, A ) );
-            X->s = -s;
-        }
-    }
-    else
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_abs( X, A, B ) );
-        X->s = s;
-    }
-
-cleanup:
-
-    return( ret );
+    return add_sub_mpi(X, A, B, -1);
 }
 
 /*
  * Signed addition: X = A + b
  */
-int mbedtls_mpi_add_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+int mbedtls_mpi_add_int(mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b)
 {
     mbedtls_mpi B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
 
-    p[0] = ( b < 0 ) ? -b : b;
-    B.s = ( b < 0 ) ? -1 : 1;
+    p[0] = mpi_sint_abs(b);
+    B.s = (b < 0) ? -1 : 1;
     B.n = 1;
     B.p = p;
 
-    return( mbedtls_mpi_add_mpi( X, A, &B ) );
+    return mbedtls_mpi_add_mpi(X, A, &B);
 }
 
 /*
  * Signed subtraction: X = A - b
  */
-int mbedtls_mpi_sub_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+int mbedtls_mpi_sub_int(mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_sint b)
 {
     mbedtls_mpi B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
 
-    p[0] = ( b < 0 ) ? -b : b;
-    B.s = ( b < 0 ) ? -1 : 1;
+    p[0] = mpi_sint_abs(b);
+    B.s = (b < 0) ? -1 : 1;
     B.n = 1;
     B.p = p;
 
-    return( mbedtls_mpi_sub_mpi( X, A, &B ) );
-}
-
-/** Helper for mbedtls_mpi multiplication.
- *
- * Add \p b * \p s to \p d.
- *
- * \param i             The number of limbs of \p s.
- * \param[in] s         A bignum to multiply, of size \p i.
- *                      It may overlap with \p d, but only if
- *                      \p d <= \p s.
- *                      Its leading limb must not be \c 0.
- * \param[in,out] d     The bignum to add to.
- *                      It must be sufficiently large to store the
- *                      result of the multiplication. This means
- *                      \p i + 1 limbs if \p d[\p i - 1] started as 0 and \p b
- *                      is not known a priori.
- * \param b             A scalar to multiply.
- */
-static
-#if defined(__APPLE__) && defined(__arm__)
-/*
- * Apple LLVM version 4.2 (clang-425.0.24) (based on LLVM 3.2svn)
- * appears to need this to prevent bad ARM code generation at -O3.
- */
-__attribute__ ((noinline))
-#endif
-void mpi_mul_hlp( size_t i,
-                  const mbedtls_mpi_uint *s,
-                  mbedtls_mpi_uint *d,
-                  mbedtls_mpi_uint b )
-{
-    mbedtls_mpi_uint c = 0, t = 0;
-
-#if defined(MULADDC_HUIT)
-    for( ; i >= 8; i -= 8 )
-    {
-        MULADDC_INIT
-        MULADDC_HUIT
-        MULADDC_STOP
-    }
-
-    for( ; i > 0; i-- )
-    {
-        MULADDC_INIT
-        MULADDC_CORE
-        MULADDC_STOP
-    }
-#else /* MULADDC_HUIT */
-    for( ; i >= 16; i -= 16 )
-    {
-        MULADDC_INIT
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_STOP
-    }
-
-    for( ; i >= 8; i -= 8 )
-    {
-        MULADDC_INIT
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_CORE   MULADDC_CORE
-        MULADDC_STOP
-    }
-
-    for( ; i > 0; i-- )
-    {
-        MULADDC_INIT
-        MULADDC_CORE
-        MULADDC_STOP
-    }
-#endif /* MULADDC_HUIT */
-
-    t++;
-
-    while( c != 0 )
-    {
-        *d += c; c = ( *d < c ); d++;
-    }
+    return mbedtls_mpi_sub_mpi(X, A, &B);
 }
 
 /*
  * Baseline multiplication: X = A * B  (HAC 14.12)
  */
-int mbedtls_mpi_mul_mpi( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_mul_mpi(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i, j;
     mbedtls_mpi TA, TB;
     int result_is_zero = 0;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    mbedtls_mpi_init_mempool( &TA ); mbedtls_mpi_init_mempool( &TB );
+    mbedtls_mpi_init_mempool(&TA); mbedtls_mpi_init_mempool(&TB);
 
-    if( X == A ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) ); A = &TA; }
-    if( X == B ) { MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) ); B = &TB; }
+    if (X == A) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TA, A)); A = &TA;
+    }
+    if (X == B) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, B)); B = &TB;
+    }
 
-    for( i = A->n; i > 0; i-- )
-        if( A->p[i - 1] != 0 )
+    for (i = A->n; i > 0; i--) {
+        if (A->p[i - 1] != 0) {
             break;
-    if( i == 0 )
+        }
+    }
+    if (i == 0) {
         result_is_zero = 1;
+    }
 
-    for( j = B->n; j > 0; j-- )
-        if( B->p[j - 1] != 0 )
+    for (j = B->n; j > 0; j--) {
+        if (B->p[j - 1] != 0) {
             break;
-    if( j == 0 )
+        }
+    }
+    if (j == 0) {
         result_is_zero = 1;
+    }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, i + j ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, i + j));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 0));
 
-    for( ; j > 0; j-- )
-        mpi_mul_hlp( i, A->p, X->p + j - 1, B->p[j - 1] );
+    for (size_t k = 0; k < j; k++) {
+        /* We know that there cannot be any carry-out since we're
+         * iterating from bottom to top. */
+        (void) mbedtls_mpi_core_mla(X->p + k, i + 1,
+                                    A->p, i,
+                                    B->p[k]);
+    }
 
     /* If the result is 0, we don't shortcut the operation, which reduces
      * but does not eliminate side channels leaking the zero-ness. We do
      * need to take care to set the sign bit properly since the library does
      * not fully support an MPI object with a value of 0 and s == -1. */
-    if( result_is_zero )
+    if (result_is_zero) {
         X->s = 1;
-    else
+    } else {
         X->s = A->s * B->s;
+    }
 
 cleanup:
 
-    mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TA );
+    mbedtls_mpi_free(&TB); mbedtls_mpi_free(&TA);
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Baseline multiplication: X = A * b
  */
-int mbedtls_mpi_mul_int( mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b )
+int mbedtls_mpi_mul_int(mbedtls_mpi *X, const mbedtls_mpi *A, mbedtls_mpi_uint b)
 {
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
 
-    /* mpi_mul_hlp can't deal with a leading 0. */
     size_t n = A->n;
-    while( n > 0 && A->p[n - 1] == 0 )
+    while (n > 0 && A->p[n - 1] == 0) {
         --n;
-
-    /* The general method below doesn't work if n==0 or b==0. By chance
-     * calculating the result is trivial in those cases. */
-    if( b == 0 || n == 0 )
-    {
-        return( mbedtls_mpi_lset( X, 0 ) );
     }
 
-    /* Calculate A*b as A + A*(b-1) to take advantage of mpi_mul_hlp */
+    /* The general method below doesn't work if b==0. */
+    if (b == 0 || n == 0) {
+        return mbedtls_mpi_lset(X, 0);
+    }
+
+    /* Calculate A*b as A + A*(b-1) to take advantage of mbedtls_mpi_core_mla */
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     /* In general, A * b requires 1 limb more than b. If
      * A->p[n - 1] * b / b == A->p[n - 1], then A * b fits in the same
@@ -1579,27 +1256,32 @@
      * making the call to grow() unconditional causes slightly fewer
      * calls to calloc() in ECP code, presumably because it reuses the
      * same mpi for a while and this way the mpi is more likely to directly
-     * grow to its final size. */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, n + 1 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, A ) );
-    mpi_mul_hlp( n, A->p, X->p, b - 1 );
+     * grow to its final size.
+     *
+     * Note that calculating A*b as 0 + A*b doesn't work as-is because
+     * A,X can be the same. */
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, n + 1));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A));
+    mbedtls_mpi_core_mla(X->p, X->n, A->p, n, b - 1);
 
 cleanup:
-    return( ret );
+    return ret;
 }
 
 /*
  * Unsigned integer divide - double mbedtls_mpi_uint dividend, u1/u0, and
  * mbedtls_mpi_uint divisor, d
  */
-static mbedtls_mpi_uint mbedtls_int_div_int( mbedtls_mpi_uint u1,
-            mbedtls_mpi_uint u0, mbedtls_mpi_uint d, mbedtls_mpi_uint *r )
+static mbedtls_mpi_uint mbedtls_int_div_int(mbedtls_mpi_uint u1,
+                                            mbedtls_mpi_uint u0,
+                                            mbedtls_mpi_uint d,
+                                            mbedtls_mpi_uint *r)
 {
 #if defined(MBEDTLS_HAVE_UDBL)
     mbedtls_t_udbl dividend, quotient;
 #else
     const mbedtls_mpi_uint radix = (mbedtls_mpi_uint) 1 << biH;
-    const mbedtls_mpi_uint uint_halfword_mask = ( (mbedtls_mpi_uint) 1 << biH ) - 1;
+    const mbedtls_mpi_uint uint_halfword_mask = ((mbedtls_mpi_uint) 1 << biH) - 1;
     mbedtls_mpi_uint d0, d1, q0, q1, rAX, r0, quotient;
     mbedtls_mpi_uint u0_msw, u0_lsw;
     size_t s;
@@ -1608,22 +1290,25 @@
     /*
      * Check for overflow
      */
-    if( 0 == d || u1 >= d )
-    {
-        if (r != NULL) *r = ~0;
+    if (0 == d || u1 >= d) {
+        if (r != NULL) {
+            *r = ~(mbedtls_mpi_uint) 0u;
+        }
 
-        return ( ~0 );
+        return ~(mbedtls_mpi_uint) 0u;
     }
 
 #if defined(MBEDTLS_HAVE_UDBL)
     dividend  = (mbedtls_t_udbl) u1 << biL;
     dividend |= (mbedtls_t_udbl) u0;
     quotient = dividend / d;
-    if( quotient > ( (mbedtls_t_udbl) 1 << biL ) - 1 )
-        quotient = ( (mbedtls_t_udbl) 1 << biL ) - 1;
+    if (quotient > ((mbedtls_t_udbl) 1 << biL) - 1) {
+        quotient = ((mbedtls_t_udbl) 1 << biL) - 1;
+    }
 
-    if( r != NULL )
-        *r = (mbedtls_mpi_uint)( dividend - (quotient * d ) );
+    if (r != NULL) {
+        *r = (mbedtls_mpi_uint) (dividend - (quotient * d));
+    }
 
     return (mbedtls_mpi_uint) quotient;
 #else
@@ -1636,11 +1321,11 @@
     /*
      * Normalize the divisor, d, and dividend, u0, u1
      */
-    s = mbedtls_clz( d );
+    s = mbedtls_mpi_core_clz(d);
     d = d << s;
 
     u1 = u1 << s;
-    u1 |= ( u0 >> ( biL - s ) ) & ( -(mbedtls_mpi_sint)s >> ( biL - 1 ) );
+    u1 |= (u0 >> (biL - s)) & (-(mbedtls_mpi_sint) s >> (biL - 1));
     u0 =  u0 << s;
 
     d1 = d >> biH;
@@ -1655,28 +1340,31 @@
     q1 = u1 / d1;
     r0 = u1 - d1 * q1;
 
-    while( q1 >= radix || ( q1 * d0 > radix * r0 + u0_msw ) )
-    {
+    while (q1 >= radix || (q1 * d0 > radix * r0 + u0_msw)) {
         q1 -= 1;
         r0 += d1;
 
-        if ( r0 >= radix ) break;
+        if (r0 >= radix) {
+            break;
+        }
     }
 
-    rAX = ( u1 * radix ) + ( u0_msw - q1 * d );
+    rAX = (u1 * radix) + (u0_msw - q1 * d);
     q0 = rAX / d1;
     r0 = rAX - q0 * d1;
 
-    while( q0 >= radix || ( q0 * d0 > radix * r0 + u0_lsw ) )
-    {
+    while (q0 >= radix || (q0 * d0 > radix * r0 + u0_lsw)) {
         q0 -= 1;
         r0 += d1;
 
-        if ( r0 >= radix ) break;
+        if (r0 >= radix) {
+            break;
+        }
     }
 
-    if (r != NULL)
-        *r = ( rAX * radix + u0_lsw - q0 * d ) >> s;
+    if (r != NULL) {
+        *r = (rAX * radix + u0_lsw - q0 * d) >> s;
+    }
 
     quotient = q1 * radix + q0;
 
@@ -1687,21 +1375,22 @@
 /*
  * Division by mbedtls_mpi: A = Q * B + R  (HAC 14.20)
  */
-int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
-                         const mbedtls_mpi *B )
+int mbedtls_mpi_div_mpi(mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
+                        const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t i, n, t, k;
     mbedtls_mpi X, Y, Z, T1, T2;
     mbedtls_mpi_uint TP2[3];
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    if( mbedtls_mpi_cmp_int( B, 0 ) == 0 )
-        return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
+    if (mbedtls_mpi_cmp_int(B, 0) == 0) {
+        return MBEDTLS_ERR_MPI_DIVISION_BY_ZERO;
+    }
 
-    mbedtls_mpi_init_mempool( &X ); mbedtls_mpi_init_mempool( &Y );
-    mbedtls_mpi_init_mempool( &Z ); mbedtls_mpi_init_mempool( &T1 );
+    mbedtls_mpi_init_mempool(&X); mbedtls_mpi_init_mempool(&Y);
+    mbedtls_mpi_init_mempool(&Z); mbedtls_mpi_init_mempool(&T1);
     /*
      * Avoid dynamic memory allocations for constant-size T2.
      *
@@ -1710,196 +1399,194 @@
      * buffer.
      */
     T2.s = 1;
-    T2.n = sizeof( TP2 ) / sizeof( *TP2 );
+    T2.n = sizeof(TP2) / sizeof(*TP2);
     T2.p = TP2;
 
-    if( mbedtls_mpi_cmp_abs( A, B ) < 0 )
-    {
-        if( Q != NULL ) MBEDTLS_MPI_CHK( mbedtls_mpi_lset( Q, 0 ) );
-        if( R != NULL ) MBEDTLS_MPI_CHK( mbedtls_mpi_copy( R, A ) );
-        return( 0 );
+    if (mbedtls_mpi_cmp_abs(A, B) < 0) {
+        if (Q != NULL) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_lset(Q, 0));
+        }
+        if (R != NULL) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(R, A));
+        }
+        return 0;
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &X, A ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, B ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&X, A));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Y, B));
     X.s = Y.s = 1;
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &Z, A->n + 2 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Z,  0 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, A->n + 2 ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&Z, A->n + 2));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Z,  0));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T1, A->n + 2));
 
-    k = mbedtls_mpi_bitlen( &Y ) % biL;
-    if( k < biL - 1 )
-    {
+    k = mbedtls_mpi_bitlen(&Y) % biL;
+    if (k < biL - 1) {
         k = biL - 1 - k;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &X, k ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &Y, k ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&X, k));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&Y, k));
+    } else {
+        k = 0;
     }
-    else k = 0;
 
     n = X.n - 1;
     t = Y.n - 1;
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &Y, biL * ( n - t ) ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&Y, biL * (n - t)));
 
-    while( mbedtls_mpi_cmp_mpi( &X, &Y ) >= 0 )
-    {
+    while (mbedtls_mpi_cmp_mpi(&X, &Y) >= 0) {
         Z.p[n - t]++;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &X, &X, &Y ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&X, &X, &Y));
     }
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &Y, biL * ( n - t ) ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&Y, biL * (n - t)));
 
-    for( i = n; i > t ; i-- )
-    {
-        if( X.p[i] >= Y.p[t] )
-            Z.p[i - t - 1] = ~0;
-        else
-        {
-            Z.p[i - t - 1] = mbedtls_int_div_int( X.p[i], X.p[i - 1],
-                                                            Y.p[t], NULL);
+    for (i = n; i > t; i--) {
+        if (X.p[i] >= Y.p[t]) {
+            Z.p[i - t - 1] = ~(mbedtls_mpi_uint) 0u;
+        } else {
+            Z.p[i - t - 1] = mbedtls_int_div_int(X.p[i], X.p[i - 1],
+                                                 Y.p[t], NULL);
         }
 
-        T2.p[0] = ( i < 2 ) ? 0 : X.p[i - 2];
-        T2.p[1] = ( i < 1 ) ? 0 : X.p[i - 1];
+        T2.p[0] = (i < 2) ? 0 : X.p[i - 2];
+        T2.p[1] = (i < 1) ? 0 : X.p[i - 1];
         T2.p[2] = X.p[i];
 
         Z.p[i - t - 1]++;
-        do
-        {
+        do {
             Z.p[i - t - 1]--;
 
-            MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &T1, 0 ) );
-            T1.p[0] = ( t < 1 ) ? 0 : Y.p[t - 1];
+            MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&T1, 0));
+            T1.p[0] = (t < 1) ? 0 : Y.p[t - 1];
             T1.p[1] = Y.p[t];
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &T1, Z.p[i - t - 1] ) );
-        }
-        while( mbedtls_mpi_cmp_mpi( &T1, &T2 ) > 0 );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_mul_int(&T1, &T1, Z.p[i - t - 1]));
+        } while (mbedtls_mpi_cmp_mpi(&T1, &T2) > 0);
 
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &Y, Z.p[i - t - 1] ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &T1,  biL * ( i - t - 1 ) ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &X, &X, &T1 ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mul_int(&T1, &Y, Z.p[i - t - 1]));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&T1,  biL * (i - t - 1)));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&X, &X, &T1));
 
-        if( mbedtls_mpi_cmp_int( &X, 0 ) < 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &T1, &Y ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &T1, biL * ( i - t - 1 ) ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &X, &X, &T1 ) );
+        if (mbedtls_mpi_cmp_int(&X, 0) < 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&T1, &Y));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&T1, biL * (i - t - 1)));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&X, &X, &T1));
             Z.p[i - t - 1]--;
         }
     }
 
-    if( Q != NULL )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( Q, &Z ) );
+    if (Q != NULL) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(Q, &Z));
         Q->s = A->s * B->s;
     }
 
-    if( R != NULL )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &X, k ) );
+    if (R != NULL) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&X, k));
         X.s = A->s;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( R, &X ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(R, &X));
 
-        if( mbedtls_mpi_cmp_int( R, 0 ) == 0 )
+        if (mbedtls_mpi_cmp_int(R, 0) == 0) {
             R->s = 1;
+        }
     }
 
 cleanup:
 
-    mbedtls_mpi_free( &X ); mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &Z );
-    mbedtls_mpi_free( &T1 );
-    mbedtls_platform_zeroize( TP2, sizeof( TP2 ) );
+    mbedtls_mpi_free(&X); mbedtls_mpi_free(&Y); mbedtls_mpi_free(&Z);
+    mbedtls_mpi_free(&T1);
+    mbedtls_platform_zeroize(TP2, sizeof(TP2));
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Division by int: A = Q * b + R
  */
-int mbedtls_mpi_div_int( mbedtls_mpi *Q, mbedtls_mpi *R,
-                         const mbedtls_mpi *A,
-                         mbedtls_mpi_sint b )
+int mbedtls_mpi_div_int(mbedtls_mpi *Q, mbedtls_mpi *R,
+                        const mbedtls_mpi *A,
+                        mbedtls_mpi_sint b)
 {
     mbedtls_mpi B;
     mbedtls_mpi_uint p[1];
-    MPI_VALIDATE_RET( A != NULL );
+    MPI_VALIDATE_RET(A != NULL);
 
-    p[0] = ( b < 0 ) ? -b : b;
-    B.s = ( b < 0 ) ? -1 : 1;
+    p[0] = mpi_sint_abs(b);
+    B.s = (b < 0) ? -1 : 1;
     B.n = 1;
     B.p = p;
 
-    return( mbedtls_mpi_div_mpi( Q, R, A, &B ) );
+    return mbedtls_mpi_div_mpi(Q, R, A, &B);
 }
 
 /*
  * Modulo: R = A mod B
  */
-int mbedtls_mpi_mod_mpi( mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_mod_mpi(mbedtls_mpi *R, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    MPI_VALIDATE_RET( R != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    MPI_VALIDATE_RET(R != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    if( mbedtls_mpi_cmp_int( B, 0 ) < 0 )
-        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+    if (mbedtls_mpi_cmp_int(B, 0) < 0) {
+        return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
+    }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( NULL, R, A, B ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(NULL, R, A, B));
 
-    while( mbedtls_mpi_cmp_int( R, 0 ) < 0 )
-      MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( R, R, B ) );
+    while (mbedtls_mpi_cmp_int(R, 0) < 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(R, R, B));
+    }
 
-    while( mbedtls_mpi_cmp_mpi( R, B ) >= 0 )
-      MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( R, R, B ) );
+    while (mbedtls_mpi_cmp_mpi(R, B) >= 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(R, R, B));
+    }
 
 cleanup:
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Modulo: r = A mod b
  */
-int mbedtls_mpi_mod_int( mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b )
+int mbedtls_mpi_mod_int(mbedtls_mpi_uint *r, const mbedtls_mpi *A, mbedtls_mpi_sint b)
 {
     size_t i;
     mbedtls_mpi_uint x, y, z;
-    MPI_VALIDATE_RET( r != NULL );
-    MPI_VALIDATE_RET( A != NULL );
+    MPI_VALIDATE_RET(r != NULL);
+    MPI_VALIDATE_RET(A != NULL);
 
-    if( b == 0 )
-        return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
+    if (b == 0) {
+        return MBEDTLS_ERR_MPI_DIVISION_BY_ZERO;
+    }
 
-    if( b < 0 )
-        return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE );
+    if (b < 0) {
+        return MBEDTLS_ERR_MPI_NEGATIVE_VALUE;
+    }
 
     /*
      * handle trivial cases
      */
-    if( b == 1 || A->n == 0 )
-    {
+    if (b == 1 || A->n == 0) {
         *r = 0;
-        return( 0 );
+        return 0;
     }
 
-    if( b == 2 )
-    {
+    if (b == 2) {
         *r = A->p[0] & 1;
-        return( 0 );
+        return 0;
     }
 
     /*
      * general case
      */
-    for( i = A->n, y = 0; i > 0; i-- )
-    {
+    for (i = A->n, y = 0; i > 0; i--) {
         x  = A->p[i - 1];
-        y  = ( y << biH ) | ( x >> biH );
+        y  = (y << biH) | (x >> biH);
         z  = y / b;
         y -= z * b;
 
         x <<= biH;
-        y  = ( y << biH ) | ( x >> biH );
+        y  = (y << biH) | (x >> biH);
         z  = y / b;
         y -= z * b;
     }
@@ -1908,29 +1595,18 @@
      * If A is negative, then the current y represents a negative value.
      * Flipping it to the positive side.
      */
-    if( A->s < 0 && y != 0 )
+    if (A->s < 0 && y != 0) {
         y = b - y;
+    }
 
     *r = y;
 
-    return( 0 );
+    return 0;
 }
 
-/*
- * Fast Montgomery initialization (thanks to Tom St Denis)
- */
-static void mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N )
+static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N)
 {
-    mbedtls_mpi_uint x, m0 = N->p[0];
-    unsigned int i;
-
-    x  = m0;
-    x += ( ( m0 + 2 ) & 4 ) << 1;
-
-    for( i = biL; i >= 8; i /= 2 )
-        x *= ( 2 - ( m0 * x ) );
-
-    *mm = ~x + 1;
+    *mm = mbedtls_mpi_core_montmul_init(N->p);
 }
 
 void mbedtls_mpi_montg_init( mbedtls_mpi_uint *mm, const mbedtls_mpi *N )
@@ -1949,66 +1625,26 @@
  * \param[in]       B   One of the numbers to multiply.
  *                      It must be nonzero and must not have more limbs than N
  *                      (B->n <= N->n).
- * \param[in]       N   The modulo. N must be odd.
+ * \param[in]       N   The modulus. \p N must be odd.
  * \param           mm  The value calculated by `mpi_montg_init(&mm, N)`.
  *                      This is -N^-1 mod 2^ciL.
  * \param[in,out]   T   A bignum for temporary storage.
- *                      It must be at least twice the limb size of N plus 2
- *                      (T->n >= 2 * (N->n + 1)).
+ *                      It must be at least twice the limb size of N plus 1
+ *                      (T->n >= 2 * N->n + 1).
  *                      Its initial content is unused and
  *                      its final content is indeterminate.
- *                      Note that unlike the usual convention in the library
- *                      for `const mbedtls_mpi*`, the content of T can change.
+ *                      It does not get reallocated.
  */
-static void mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm,
-                         const mbedtls_mpi *T )
+static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B,
+                        const mbedtls_mpi *N, mbedtls_mpi_uint mm,
+                        mbedtls_mpi *T)
 {
-    size_t i, n, m;
-    mbedtls_mpi_uint u0, u1, *d;
-
-    memset( T->p, 0, T->n * ciL );
-
-    d = T->p;
-    n = N->n;
-    m = ( B->n < n ) ? B->n : n;
-
-    for( i = 0; i < n; i++ )
-    {
-        /*
-         * T = (T + u0*B + u1*N) / 2^biL
-         */
-        u0 = A->p[i];
-        u1 = ( d[0] + u0 * B->p[0] ) * mm;
-
-        mpi_mul_hlp( m, B->p, d, u0 );
-        mpi_mul_hlp( n, N->p, d, u1 );
-
-        *d++ = u0; d[n + 1] = 0;
-    }
-
-    /* At this point, d is either the desired result or the desired result
-     * plus N. We now potentially subtract N, avoiding leaking whether the
-     * subtraction is performed through side channels. */
-
-    /* Copy the n least significant limbs of d to A, so that
-     * A = d if d < N (recall that N has n limbs). */
-    memcpy( A->p, d, n * ciL );
-    /* If d >= N then we want to set A to d - N. To prevent timing attacks,
-     * do the calculation without using conditional tests. */
-    /* Set d to d0 + (2^biL)^n - N where d0 is the current value of d. */
-    d[n] += 1;
-    d[n] -= mpi_sub_hlp( n, d, d, N->p );
-    /* If d0 < N then d < (2^biL)^n
-     * so d[n] == 0 and we want to keep A as it is.
-     * If d0 >= N then d >= (2^biL)^n, and d <= (2^biL)^n + N < 2 * (2^biL)^n
-     * so d[n] == 1 and we want to set A to the result of the subtraction
-     * which is d - (2^biL)^n, i.e. the n least significant limbs of d.
-     * This exactly corresponds to a conditional assignment. */
-    mbedtls_ct_mpi_uint_cond_assign( n, A->p, d, (unsigned char) d[n] );
+    mbedtls_mpi_core_montmul(A->p, A->p, B->p, B->n, N->p, N->n, mm, T->p);
 }
 
-void mbedtls_mpi_montmul( mbedtls_mpi *A, const mbedtls_mpi *B, const mbedtls_mpi *N, mbedtls_mpi_uint mm,
-                          const mbedtls_mpi *T )
+void mbedtls_mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B,
+                         const mbedtls_mpi *N, mbedtls_mpi_uint mm,
+                         mbedtls_mpi *T )
 {
     mpi_montmul( A, B, N, mm, T);
 }
@@ -2018,8 +1654,8 @@
  *
  * See mpi_montmul() regarding constraints and guarantees on the parameters.
  */
-static void mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N,
-                         mbedtls_mpi_uint mm, const mbedtls_mpi *T )
+static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N,
+                        mbedtls_mpi_uint mm, mbedtls_mpi *T)
 {
     mbedtls_mpi_uint z = 1;
     mbedtls_mpi U;
@@ -2027,13 +1663,13 @@
     U.n = U.s = (int) z;
     U.p = &z;
 
-    mpi_montmul( A, &U, N, mm, T );
+    mpi_montmul(A, &U, N, mm, T);
 }
 
-void mbedtls_mpi_montred( mbedtls_mpi *A, const mbedtls_mpi *N,
-                          mbedtls_mpi_uint mm, const mbedtls_mpi *T )
+void mbedtls_mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N,
+                         mbedtls_mpi_uint mm, mbedtls_mpi *T)
 {
-    mpi_montred( A, N, mm, T );
+    mpi_montred(A, N, mm, T);
 }
 
 /**
@@ -2051,87 +1687,136 @@
  *
  * \return \c 0 on success, or a negative error code.
  */
-static int mpi_select( mbedtls_mpi *R, const mbedtls_mpi *T, size_t T_size, size_t idx )
+static int mpi_select(mbedtls_mpi *R, const mbedtls_mpi *T, size_t T_size, size_t idx)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
-    for( size_t i = 0; i < T_size; i++ )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_assign( R, &T[i],
-                        (unsigned char) mbedtls_ct_size_bool_eq( i, idx ) ) );
+    for (size_t i = 0; i < T_size; i++) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_safe_cond_assign(R, &T[i],
+                                                     (unsigned char) mbedtls_ct_size_bool_eq(i,
+                                                                                             idx)));
     }
 
 cleanup:
-    return( ret );
+    return ret;
 }
 
 /*
  * Sliding-window exponentiation: X = A^E mod N  (HAC 14.85)
  */
-int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
-                         const mbedtls_mpi *E, const mbedtls_mpi *N,
-                         mbedtls_mpi *prec_RR )
+int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
+                        const mbedtls_mpi *E, const mbedtls_mpi *N,
+                        mbedtls_mpi *prec_RR)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t wbits, wsize, one = 1;
+    size_t window_bitsize;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
+    size_t exponent_bits_in_window = 0;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, WW, Apos;
-    mbedtls_mpi *W = NULL;
+    mbedtls_mpi *W;
     const size_t array_size_W = 2 << MBEDTLS_MPI_WINDOW_SIZE;
     int neg;
 
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( E != NULL );
-    MPI_VALIDATE_RET( N != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(E != NULL);
+    MPI_VALIDATE_RET(N != NULL);
 
-    if( mbedtls_mpi_cmp_int( N, 0 ) <= 0 || ( N->p[0] & 1 ) == 0 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (mbedtls_mpi_cmp_int(N, 0) <= 0 || (N->p[0] & 1) == 0) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    if( mbedtls_mpi_cmp_int( E, 0 ) < 0 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (mbedtls_mpi_cmp_int(E, 0) < 0) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    if( mbedtls_mpi_bitlen( E ) > MBEDTLS_MPI_MAX_BITS ||
-        mbedtls_mpi_bitlen( N ) > MBEDTLS_MPI_MAX_BITS )
-        return ( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (mbedtls_mpi_bitlen(E) > MBEDTLS_MPI_MAX_BITS ||
+        mbedtls_mpi_bitlen(N) > MBEDTLS_MPI_MAX_BITS) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
     /*
      * Init temps and window size
      */
-    mpi_montg_init( &mm, N );
-    mbedtls_mpi_init_mempool( &RR ); mbedtls_mpi_init( &T );
-    mbedtls_mpi_init_mempool( &Apos );
-    mbedtls_mpi_init_mempool( &WW );
+    mpi_montg_init(&mm, N);
+    mbedtls_mpi_init_mempool(&RR); mbedtls_mpi_init(&T);
+    mbedtls_mpi_init_mempool(&Apos);
+    mbedtls_mpi_init_mempool(&WW);
 
-    i = mbedtls_mpi_bitlen( E );
+    i = mbedtls_mpi_bitlen(E);
 
-    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
-            ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+    window_bitsize = (i > 671) ? 6 : (i > 239) ? 5 :
+                     (i >  79) ? 4 : (i >  23) ? 3 : 1;
 
-#if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
-    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
-        wsize = MBEDTLS_MPI_WINDOW_SIZE;
+#if (MBEDTLS_MPI_WINDOW_SIZE < 6)
+    if (window_bitsize > MBEDTLS_MPI_WINDOW_SIZE) {
+        window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
+    }
 #endif
 
+    const size_t w_table_used_size = (size_t) 1 << window_bitsize;
+
+    W = mempool_alloc(mbedtls_mpi_mempool,
+                      sizeof( mbedtls_mpi ) * array_size_W);
+    if (W == NULL) {
+        ret = MBEDTLS_ERR_MPI_ALLOC_FAILED;
+        goto cleanup;
+    }
+    for (i = 0; i < array_size_W; i++)
+        mbedtls_mpi_init_mempool(W + i);
+    /*
+     * This function is not constant-trace: its memory accesses depend on the
+     * exponent value. To defend against timing attacks, callers (such as RSA
+     * and DHM) should use exponent blinding. However this is not enough if the
+     * adversary can find the exponent in a single trace, so this function
+     * takes extra precautions against adversaries who can observe memory
+     * access patterns.
+     *
+     * This function performs a series of multiplications by table elements and
+     * squarings, and we want the prevent the adversary from finding out which
+     * table element was used, and from distinguishing between multiplications
+     * and squarings. Firstly, when multiplying by an element of the window
+     * W[i], we do a constant-trace table lookup to obfuscate i. This leaves
+     * squarings as having a different memory access patterns from other
+     * multiplications. So secondly, we put the accumulator X in the table as
+     * well, and also do a constant-trace table lookup to multiply by X.
+     *
+     * This way, all multiplications take the form of a lookup-and-multiply.
+     * The number of lookup-and-multiply operations inside each iteration of
+     * the main loop still depends on the bits of the exponent, but since the
+     * other operations in the loop don't have an easily recognizable memory
+     * trace, an adversary is unlikely to be able to observe the exact
+     * patterns.
+     *
+     * An adversary may still be able to recover the exponent if they can
+     * observe both memory accesses and branches. However, branch prediction
+     * exploitation typically requires many traces of execution over the same
+     * data, which is defeated by randomized blinding.
+     *
+     * To achieve this, we make a copy of X and we use the table entry in each
+     * calculation from this point on.
+     */
+    const size_t x_index = 0;
+    mbedtls_mpi_copy(&W[x_index], X);
+
     j = N->n + 1;
     /* All W[i] and X must have at least N->n limbs for the mpi_montmul()
      * and mpi_montred() calls later. Here we ensure that W[1] and X are
      * large enough, and later we'll grow other W[i] to the same length.
      * They must not be shrunk midway through this function!
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[x_index], j));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, j * 2));
 
     /*
      * Compensate for negative A (and correct at the end)
      */
-    neg = ( A->s == -1 );
-    if( neg )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Apos, A ) );
+    neg = (A->s == -1);
+    if (neg) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Apos, A));
         Apos.s = 1;
         A = &Apos;
     }
@@ -2139,93 +1824,89 @@
     /*
      * If 1st call, pre-compute R^2 mod N
      */
-    if( prec_RR == NULL || prec_RR->p == NULL )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &RR, 1 ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &RR, N->n * 2 * biL ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &RR, &RR, N ) );
+    if (prec_RR == NULL || prec_RR->p == NULL) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&RR, 1));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&RR, N->n * 2 * biL));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&RR, &RR, N));
 
-        if( prec_RR != NULL )
-            memcpy( prec_RR, &RR, sizeof( mbedtls_mpi ) );
+        if (prec_RR != NULL) {
+            memcpy(prec_RR, &RR, sizeof(mbedtls_mpi));
+        }
+    } else {
+        memcpy(&RR, prec_RR, sizeof(mbedtls_mpi));
     }
-    else
-        memcpy( &RR, prec_RR, sizeof( mbedtls_mpi ) );
 
-    W = mempool_alloc( mbedtls_mpi_mempool,
-                       sizeof( mbedtls_mpi ) * array_size_W );
-    if( W == NULL ) {
-        ret = MBEDTLS_ERR_MPI_ALLOC_FAILED;
-        goto cleanup;
-    }
-    for( i = 0; i < array_size_W; i++ )
-        mbedtls_mpi_init_mempool( W + i );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1],  j));
 
     /*
      * W[1] = A * R^2 * R^-1 mod N = A * R mod N
      */
-    if( mbedtls_mpi_cmp_mpi( A, N ) >= 0 )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &W[1], A, N ) );
+    if (mbedtls_mpi_cmp_mpi(A, N) >= 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&W[1], A, N));
         /* This should be a no-op because W[1] is already that large before
          * mbedtls_mpi_mod_mpi(), but it's necessary to avoid an overflow
          * in mpi_montmul() below, so let's make sure. */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1], N->n + 1 ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], N->n + 1));
+    } else {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[1], A));
     }
-    else
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[1], A ) );
 
     /* Note that this is safe because W[1] always has at least N->n limbs
      * (it grew above and was preserved by mbedtls_mpi_copy()). */
-    mpi_montmul( &W[1], &RR, N, mm, &T );
+    mpi_montmul(&W[1], &RR, N, mm, &T);
 
     /*
-     * X = R^2 * R^-1 mod N = R mod N
+     * W[x_index] = R^2 * R^-1 mod N = R mod N
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) );
-    mpi_montred( X, N, mm, &T );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[x_index], &RR));
+    mpi_montred(&W[x_index], N, mm, &T);
 
-    if( wsize > 1 )
-    {
+
+    if (window_bitsize > 1) {
         /*
-         * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
+         * W[i] = W[1] ^ i
+         *
+         * The first bit of the sliding window is always 1 and therefore we
+         * only need to store the second half of the table.
+         *
+         * (There are two special elements in the table: W[0] for the
+         * accumulator/result and W[1] for A in Montgomery form. Both of these
+         * are already set at this point.)
          */
-        j =  one << ( wsize - 1 );
+        j = w_table_used_size / 2;
 
-        MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[j], N->n + 1));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[j], &W[1]));
 
-        for( i = 0; i < wsize - 1; i++ )
-            mpi_montmul( &W[j], &W[j], N, mm, &T );
+        for (i = 0; i < window_bitsize - 1; i++) {
+            mpi_montmul(&W[j], &W[j], N, mm, &T);
+        }
 
         /*
          * W[i] = W[i - 1] * W[1]
          */
-        for( i = j + 1; i < ( one << wsize ); i++ )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
+        for (i = j + 1; i < w_table_used_size; i++) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[i], N->n + 1));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[i], &W[i - 1]));
 
-            mpi_montmul( &W[i], &W[1], N, mm, &T );
+            mpi_montmul(&W[i], &W[1], N, mm, &T);
         }
     }
 
     nblimbs = E->n;
     bufsize = 0;
     nbits   = 0;
-    wbits   = 0;
     state   = 0;
 
-    while( 1 )
-    {
-        if( bufsize == 0 )
-        {
-            if( nblimbs == 0 )
+    while (1) {
+        if (bufsize == 0) {
+            if (nblimbs == 0) {
                 break;
+            }
 
             nblimbs--;
 
-            bufsize = sizeof( mbedtls_mpi_uint ) << 3;
+            bufsize = sizeof(mbedtls_mpi_uint) << 3;
         }
 
         bufsize--;
@@ -2235,15 +1916,16 @@
         /*
          * skip leading 0s
          */
-        if( ei == 0 && state == 0 )
+        if (ei == 0 && state == 0) {
             continue;
+        }
 
-        if( ei == 0 && state == 1 )
-        {
+        if (ei == 0 && state == 1) {
             /*
-             * out of window, square X
+             * out of window, square W[x_index]
              */
-            mpi_montmul( X, X, N, mm, &T );
+            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index));
+            mpi_montmul(&W[x_index], &WW, N, mm, &T);
             continue;
         }
 
@@ -2253,101 +1935,112 @@
         state = 2;
 
         nbits++;
-        wbits |= ( ei << ( wsize - nbits ) );
+        exponent_bits_in_window |= (ei << (window_bitsize - nbits));
 
-        if( nbits == wsize )
-        {
+        if (nbits == window_bitsize) {
             /*
-             * X = X^wsize R^-1 mod N
+             * W[x_index] = W[x_index]^window_bitsize R^-1 mod N
              */
-            for( i = 0; i < wsize; i++ )
-                mpi_montmul( X, X, N, mm, &T );
+            for (i = 0; i < window_bitsize; i++) {
+                MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size,
+                                           x_index));
+                mpi_montmul(&W[x_index], &WW, N, mm, &T);
+            }
 
             /*
-             * X = X * W[wbits] R^-1 mod N
+             * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, (size_t) 1 << wsize, wbits ) );
-            mpi_montmul( X, &WW, N, mm, &T );
+            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size,
+                                       exponent_bits_in_window));
+            mpi_montmul(&W[x_index], &WW, N, mm, &T);
 
             state--;
             nbits = 0;
-            wbits = 0;
+            exponent_bits_in_window = 0;
         }
     }
 
     /*
      * process the remaining bits
      */
-    for( i = 0; i < nbits; i++ )
-    {
-        mpi_montmul( X, X, N, mm, &T );
+    for (i = 0; i < nbits; i++) {
+        MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index));
+        mpi_montmul(&W[x_index], &WW, N, mm, &T);
 
-        wbits <<= 1;
+        exponent_bits_in_window <<= 1;
 
-        if( ( wbits & ( one << wsize ) ) != 0 )
-            mpi_montmul( X, &W[1], N, mm, &T );
+        if ((exponent_bits_in_window & ((size_t) 1 << window_bitsize)) != 0) {
+            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, 1));
+            mpi_montmul(&W[x_index], &WW, N, mm, &T);
+        }
     }
 
     /*
-     * X = A^E * R * R^-1 mod N = A^E mod N
+     * W[x_index] = A^E * R * R^-1 mod N = A^E mod N
      */
-    mpi_montred( X, N, mm, &T );
+    mpi_montred(&W[x_index], N, mm, &T);
 
-    if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 )
-    {
-        X->s = -1;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) );
+    if (neg && E->n != 0 && (E->p[0] & 1) != 0) {
+        W[x_index].s = -1;
+        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&W[x_index], N, &W[x_index]));
     }
 
+    /*
+     * Load the result in the output variable.
+     */
+    mbedtls_mpi_copy(X, &W[x_index]);
+
 cleanup:
 
-    if( W )
-        for( i = 0; i < array_size_W; i++ )
-            mbedtls_mpi_free( W + i );
-    mempool_free( mbedtls_mpi_mempool , W );
+    if (W)
+        for (i = 0; i < array_size_W; i++)
+            mbedtls_mpi_free(W + i);
+    mempool_free(mbedtls_mpi_mempool , W);
 
-    mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos );
-    mbedtls_mpi_free( &WW );
+    mbedtls_mpi_free(&T);
+    mbedtls_mpi_free(&Apos);
+    mbedtls_mpi_free(&WW);
 
-    if( prec_RR == NULL || prec_RR->p == NULL )
-        mbedtls_mpi_free( &RR );
+    if (prec_RR == NULL || prec_RR->p == NULL) {
+        mbedtls_mpi_free(&RR);
+    }
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Greatest common divisor: G = gcd(A, B)  (HAC 14.54)
  */
-int mbedtls_mpi_gcd( mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B )
+int mbedtls_mpi_gcd(mbedtls_mpi *G, const mbedtls_mpi *A, const mbedtls_mpi *B)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     size_t lz, lzt;
     mbedtls_mpi TA, TB;
 
-    MPI_VALIDATE_RET( G != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( B != NULL );
+    MPI_VALIDATE_RET(G != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(B != NULL);
 
-    mbedtls_mpi_init_mempool( &TA ); mbedtls_mpi_init_mempool( &TB );
+    mbedtls_mpi_init_mempool(&TA); mbedtls_mpi_init_mempool(&TB);
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TA, A ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, B ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TA, A));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, B));
 
-    lz = mbedtls_mpi_lsb( &TA );
-    lzt = mbedtls_mpi_lsb( &TB );
+    lz = mbedtls_mpi_lsb(&TA);
+    lzt = mbedtls_mpi_lsb(&TB);
 
     /* The loop below gives the correct result when A==0 but not when B==0.
      * So have a special case for B==0. Leverage the fact that we just
      * calculated the lsb and lsb(B)==0 iff B is odd or 0 to make the test
      * slightly more efficient than cmp_int(). */
-    if( lzt == 0 && mbedtls_mpi_get_bit( &TB, 0 ) == 0 )
-    {
-        ret = mbedtls_mpi_copy( G, A );
+    if (lzt == 0 && mbedtls_mpi_get_bit(&TB, 0) == 0) {
+        ret = mbedtls_mpi_copy(G, A);
         goto cleanup;
     }
 
-    if( lzt < lz )
+    if (lzt < lz) {
         lz = lzt;
+    }
 
     TA.s = TB.s = 1;
 
@@ -2384,11 +2077,10 @@
      * TA becomes 0 which ends the loop (TB cannot be 0 if it is right-shifted
      * since in that case TB is calculated from TB-TA with the condition TB>TA).
      */
-    while( mbedtls_mpi_cmp_int( &TA, 0 ) != 0 )
-    {
+    while (mbedtls_mpi_cmp_int(&TA, 0) != 0) {
         /* Divisions by 2 preserve the invariant (I). */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, mbedtls_mpi_lsb( &TA ) ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, mbedtls_mpi_lsb( &TB ) ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TA, mbedtls_mpi_lsb(&TA)));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TB, mbedtls_mpi_lsb(&TB)));
 
         /* Set either TA or TB to |TA-TB|/2. Since TA and TB are both odd,
          * TA-TB is even so the division by 2 has an integer result.
@@ -2397,15 +2089,12 @@
          * also divides TB, and any odd divisor of both TB and |TA-TB|/2 also
          * divides TA.
          */
-        if( mbedtls_mpi_cmp_mpi( &TA, &TB ) >= 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( &TA, &TA, &TB ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TA, 1 ) );
-        }
-        else
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_abs( &TB, &TB, &TA ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TB, 1 ) );
+        if (mbedtls_mpi_cmp_mpi(&TA, &TB) >= 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(&TA, &TA, &TB));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TA, 1));
+        } else {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_abs(&TB, &TB, &TA));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TB, 1));
         }
         /* Note that one of TA or TB is still odd. */
     }
@@ -2419,271 +2108,186 @@
      *   In this case, lz = 0 and B = TB so gcd(A,B) = B = 2^lz * TB as well.
      */
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &TB, lz ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( G, &TB ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&TB, lz));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(G, &TB));
 
 cleanup:
 
-    mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TB );
+    mbedtls_mpi_free(&TA); mbedtls_mpi_free(&TB);
 
-    return( ret );
-}
-
-/* Fill X with n_bytes random bytes.
- * X must already have room for those bytes.
- * The ordering of the bytes returned from the RNG is suitable for
- * deterministic ECDSA (see RFC 6979 §3.3 and mbedtls_mpi_random()).
- * The size and sign of X are unchanged.
- * n_bytes must not be 0.
- */
-static int mpi_fill_random_internal(
-    mbedtls_mpi *X, size_t n_bytes,
-    int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
-{
-    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    const size_t limbs = CHARS_TO_LIMBS( n_bytes );
-    const size_t overhead = ( limbs * ciL ) - n_bytes;
-
-    if( X->n < limbs )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-
-    memset( X->p, 0, overhead );
-    memset( (unsigned char *) X->p + limbs * ciL, 0, ( X->n - limbs ) * ciL );
-    MBEDTLS_MPI_CHK( f_rng( p_rng, (unsigned char *) X->p + overhead, n_bytes ) );
-    mpi_bigendian_to_host( X->p, limbs );
-
-cleanup:
-    return( ret );
+    return ret;
 }
 
 /*
  * Fill X with size bytes of random.
- *
- * Use a temporary bytes representation to make sure the result is the same
- * regardless of the platform endianness (useful when f_rng is actually
- * deterministic, eg for tests).
+ * The bytes returned from the RNG are used in a specific order which
+ * is suitable for deterministic ECDSA (see the specification of
+ * mbedtls_mpi_random() and the implementation in mbedtls_mpi_fill_random()).
  */
-int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size,
-                     int (*f_rng)(void *, unsigned char *, size_t),
-                     void *p_rng )
+int mbedtls_mpi_fill_random(mbedtls_mpi *X, size_t size,
+                            int (*f_rng)(void *, unsigned char *, size_t),
+                            void *p_rng)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t const limbs = CHARS_TO_LIMBS( size );
+    const size_t limbs = CHARS_TO_LIMBS(size);
 
-    MPI_VALIDATE_RET( X     != NULL );
-    MPI_VALIDATE_RET( f_rng != NULL );
+    MPI_VALIDATE_RET(X     != NULL);
+    MPI_VALIDATE_RET(f_rng != NULL);
 
     /* Ensure that target MPI has exactly the necessary number of limbs */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_resize_clear( X, limbs ) );
-    if( size == 0 )
-        return( 0 );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_resize_clear(X, limbs));
+    if (size == 0) {
+        return 0;
+    }
 
-    ret = mpi_fill_random_internal( X, size, f_rng, p_rng );
+    ret = mbedtls_mpi_core_fill_random(X->p, X->n, size, f_rng, p_rng);
 
 cleanup:
-    return( ret );
+    return ret;
 }
 
-int mbedtls_mpi_random( mbedtls_mpi *X,
-                        mbedtls_mpi_sint min,
-                        const mbedtls_mpi *N,
-                        int (*f_rng)(void *, unsigned char *, size_t),
-                        void *p_rng )
+int mbedtls_mpi_random(mbedtls_mpi *X,
+                       mbedtls_mpi_sint min,
+                       const mbedtls_mpi *N,
+                       int (*f_rng)(void *, unsigned char *, size_t),
+                       void *p_rng)
 {
-    int ret = MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
-    int count;
-    unsigned lt_lower = 1, lt_upper = 0;
-    size_t n_bits = mbedtls_mpi_bitlen( N );
-    size_t n_bytes = ( n_bits + 7 ) / 8;
-    mbedtls_mpi lower_bound;
-
-    if( min < 0 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-    if( mbedtls_mpi_cmp_int( N, min ) <= 0 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
-
-    /*
-     * When min == 0, each try has at worst a probability 1/2 of failing
-     * (the msb has a probability 1/2 of being 0, and then the result will
-     * be < N), so after 30 tries failure probability is a most 2**(-30).
-     *
-     * When N is just below a power of 2, as is the case when generating
-     * a random scalar on most elliptic curves, 1 try is enough with
-     * overwhelming probability. When N is just above a power of 2,
-     * as when generating a random scalar on secp224k1, each try has
-     * a probability of failing that is almost 1/2.
-     *
-     * The probabilities are almost the same if min is nonzero but negligible
-     * compared to N. This is always the case when N is crypto-sized, but
-     * it's convenient to support small N for testing purposes. When N
-     * is small, use a higher repeat count, otherwise the probability of
-     * failure is macroscopic.
-     */
-    count = ( n_bytes > 4 ? 30 : 250 );
-
-    mbedtls_mpi_init( &lower_bound );
+    if (min < 0) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
+    if (mbedtls_mpi_cmp_int(N, min) <= 0) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
     /* Ensure that target MPI has exactly the same number of limbs
      * as the upper bound, even if the upper bound has leading zeros.
-     * This is necessary for the mbedtls_mpi_lt_mpi_ct() check. */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_resize_clear( X, N->n ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &lower_bound, N->n ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &lower_bound, min ) );
-
-    /*
-     * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA)
-     * when f_rng is a suitably parametrized instance of HMAC_DRBG:
-     * - use the same byte ordering;
-     * - keep the leftmost n_bits bits of the generated octet string;
-     * - try until result is in the desired range.
-     * This also avoids any bias, which is especially important for ECDSA.
-     */
-    do
-    {
-        MBEDTLS_MPI_CHK( mpi_fill_random_internal( X, n_bytes, f_rng, p_rng ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( X, 8 * n_bytes - n_bits ) );
-
-        if( --count == 0 )
-        {
-            ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
-            goto cleanup;
-        }
-
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lt_mpi_ct( X, &lower_bound, &lt_lower ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lt_mpi_ct( X, N, &lt_upper ) );
+     * This is necessary for mbedtls_mpi_core_random. */
+    int ret = mbedtls_mpi_resize_clear(X, N->n);
+    if (ret != 0) {
+        return ret;
     }
-    while( lt_lower != 0 || lt_upper == 0 );
 
-cleanup:
-    mbedtls_mpi_free( &lower_bound );
-    return( ret );
+    return mbedtls_mpi_core_random(X->p, min, N->p, X->n, f_rng, p_rng);
 }
 
 /*
  * Modular inverse: X = A^-1 mod N  (HAC 14.61 / 14.64)
  */
-int mbedtls_mpi_inv_mod( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N )
+int mbedtls_mpi_inv_mod(mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi *N)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     mbedtls_mpi G, TA, TU, U1, U2, TB, TV, V1, V2;
-    MPI_VALIDATE_RET( X != NULL );
-    MPI_VALIDATE_RET( A != NULL );
-    MPI_VALIDATE_RET( N != NULL );
+    MPI_VALIDATE_RET(X != NULL);
+    MPI_VALIDATE_RET(A != NULL);
+    MPI_VALIDATE_RET(N != NULL);
 
-    if( mbedtls_mpi_cmp_int( N, 1 ) <= 0 )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (mbedtls_mpi_cmp_int(N, 1) <= 0) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    mbedtls_mpi_init_mempool( &TA ); mbedtls_mpi_init_mempool( &TU );
-    mbedtls_mpi_init_mempool( &U1 ); mbedtls_mpi_init_mempool( &U2 );
-    mbedtls_mpi_init_mempool( &G ); mbedtls_mpi_init_mempool( &TB );
-    mbedtls_mpi_init_mempool( &TV ); mbedtls_mpi_init_mempool( &V1 );
-    mbedtls_mpi_init_mempool( &V2 );
+    mbedtls_mpi_init_mempool(&TA); mbedtls_mpi_init_mempool(&TU);
+    mbedtls_mpi_init_mempool(&U1); mbedtls_mpi_init_mempool(&U2);
+    mbedtls_mpi_init_mempool(&G); mbedtls_mpi_init_mempool(&TB);
+    mbedtls_mpi_init_mempool(&TV); mbedtls_mpi_init_mempool(&V1);
+    mbedtls_mpi_init_mempool(&V2);
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &G, A, N ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&G, A, N));
 
-    if( mbedtls_mpi_cmp_int( &G, 1 ) != 0 )
-    {
+    if (mbedtls_mpi_cmp_int(&G, 1) != 0) {
         ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
         goto cleanup;
     }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &TA, A, N ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TU, &TA ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TB, N ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &TV, N ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&TA, A, N));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TU, &TA));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TB, N));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&TV, N));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U1, 1 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &U2, 0 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V1, 0 ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &V2, 1 ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U1, 1));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&U2, 0));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V1, 0));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&V2, 1));
 
-    do
-    {
-        while( ( TU.p[0] & 1 ) == 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TU, 1 ) );
+    do {
+        while ((TU.p[0] & 1) == 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TU, 1));
 
-            if( ( U1.p[0] & 1 ) != 0 || ( U2.p[0] & 1 ) != 0 )
-            {
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &U1, &U1, &TB ) );
-                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &TA ) );
+            if ((U1.p[0] & 1) != 0 || (U2.p[0] & 1) != 0) {
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&U1, &U1, &TB));
+                MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &TA));
             }
 
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &U1, 1 ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &U2, 1 ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&U1, 1));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&U2, 1));
         }
 
-        while( ( TV.p[0] & 1 ) == 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &TV, 1 ) );
+        while ((TV.p[0] & 1) == 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&TV, 1));
 
-            if( ( V1.p[0] & 1 ) != 0 || ( V2.p[0] & 1 ) != 0 )
-            {
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, &TB ) );
-                MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &TA ) );
+            if ((V1.p[0] & 1) != 0 || (V2.p[0] & 1) != 0) {
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, &TB));
+                MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &TA));
             }
 
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &V1, 1 ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &V2, 1 ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&V1, 1));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&V2, 1));
         }
 
-        if( mbedtls_mpi_cmp_mpi( &TU, &TV ) >= 0 )
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TU, &TU, &TV ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U1, &U1, &V1 ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &U2, &U2, &V2 ) );
+        if (mbedtls_mpi_cmp_mpi(&TU, &TV) >= 0) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TU, &TU, &TV));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U1, &U1, &V1));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&U2, &U2, &V2));
+        } else {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&TV, &TV, &TU));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, &U1));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V2, &V2, &U2));
         }
-        else
-        {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &TV, &TV, &TU ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, &U1 ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V2, &V2, &U2 ) );
-        }
+    } while (mbedtls_mpi_cmp_int(&TU, 0) != 0);
+
+    while (mbedtls_mpi_cmp_int(&V1, 0) < 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&V1, &V1, N));
     }
-    while( mbedtls_mpi_cmp_int( &TU, 0 ) != 0 );
 
-    while( mbedtls_mpi_cmp_int( &V1, 0 ) < 0 )
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &V1, &V1, N ) );
+    while (mbedtls_mpi_cmp_mpi(&V1, N) >= 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_sub_mpi(&V1, &V1, N));
+    }
 
-    while( mbedtls_mpi_cmp_mpi( &V1, N ) >= 0 )
-        MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &V1, &V1, N ) );
-
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &V1 ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &V1));
 
 cleanup:
 
-    mbedtls_mpi_free( &TA ); mbedtls_mpi_free( &TU ); mbedtls_mpi_free( &U1 ); mbedtls_mpi_free( &U2 );
-    mbedtls_mpi_free( &G ); mbedtls_mpi_free( &TB ); mbedtls_mpi_free( &TV );
-    mbedtls_mpi_free( &V1 ); mbedtls_mpi_free( &V2 );
+    mbedtls_mpi_free(&TA); mbedtls_mpi_free(&TU); mbedtls_mpi_free(&U1); mbedtls_mpi_free(&U2);
+    mbedtls_mpi_free(&G); mbedtls_mpi_free(&TB); mbedtls_mpi_free(&TV);
+    mbedtls_mpi_free(&V1); mbedtls_mpi_free(&V2);
 
-    return( ret );
+    return ret;
 }
 
 #if defined(MBEDTLS_GENPRIME)
 
 static const int small_prime[] =
 {
-        3,    5,    7,   11,   13,   17,   19,   23,
-       29,   31,   37,   41,   43,   47,   53,   59,
-       61,   67,   71,   73,   79,   83,   89,   97,
-      101,  103,  107,  109,  113,  127,  131,  137,
-      139,  149,  151,  157,  163,  167,  173,  179,
-      181,  191,  193,  197,  199,  211,  223,  227,
-      229,  233,  239,  241,  251,  257,  263,  269,
-      271,  277,  281,  283,  293,  307,  311,  313,
-      317,  331,  337,  347,  349,  353,  359,  367,
-      373,  379,  383,  389,  397,  401,  409,  419,
-      421,  431,  433,  439,  443,  449,  457,  461,
-      463,  467,  479,  487,  491,  499,  503,  509,
-      521,  523,  541,  547,  557,  563,  569,  571,
-      577,  587,  593,  599,  601,  607,  613,  617,
-      619,  631,  641,  643,  647,  653,  659,  661,
-      673,  677,  683,  691,  701,  709,  719,  727,
-      733,  739,  743,  751,  757,  761,  769,  773,
-      787,  797,  809,  811,  821,  823,  827,  829,
-      839,  853,  857,  859,  863,  877,  881,  883,
-      887,  907,  911,  919,  929,  937,  941,  947,
-      953,  967,  971,  977,  983,  991,  997, -103
+    3,    5,    7,   11,   13,   17,   19,   23,
+    29,   31,   37,   41,   43,   47,   53,   59,
+    61,   67,   71,   73,   79,   83,   89,   97,
+    101,  103,  107,  109,  113,  127,  131,  137,
+    139,  149,  151,  157,  163,  167,  173,  179,
+    181,  191,  193,  197,  199,  211,  223,  227,
+    229,  233,  239,  241,  251,  257,  263,  269,
+    271,  277,  281,  283,  293,  307,  311,  313,
+    317,  331,  337,  347,  349,  353,  359,  367,
+    373,  379,  383,  389,  397,  401,  409,  419,
+    421,  431,  433,  439,  443,  449,  457,  461,
+    463,  467,  479,  487,  491,  499,  503,  509,
+    521,  523,  541,  547,  557,  563,  569,  571,
+    577,  587,  593,  599,  601,  607,  613,  617,
+    619,  631,  641,  643,  647,  653,  659,  661,
+    673,  677,  683,  691,  701,  709,  719,  727,
+    733,  739,  743,  751,  757,  761,  769,  773,
+    787,  797,  809,  811,  821,  823,  827,  829,
+    839,  853,  857,  859,  863,  877,  881,  883,
+    887,  907,  911,  919,  929,  937,  941,  947,
+    953,  967,  971,  977,  983,  991,  997, -103
 };
 
 /*
@@ -2695,70 +2299,71 @@
  * MBEDTLS_ERR_MPI_NOT_ACCEPTABLE: certain non-prime
  * other negative: error
  */
-static int mpi_check_small_factors( const mbedtls_mpi *X )
+static int mpi_check_small_factors(const mbedtls_mpi *X)
 {
     int ret = 0;
     size_t i;
     mbedtls_mpi_uint r;
 
-    if( ( X->p[0] & 1 ) == 0 )
-        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+    if ((X->p[0] & 1) == 0) {
+        return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
+    }
 
-    for( i = 0; small_prime[i] > 0; i++ )
-    {
-        if( mbedtls_mpi_cmp_int( X, small_prime[i] ) <= 0 )
-            return( 1 );
+    for (i = 0; small_prime[i] > 0; i++) {
+        if (mbedtls_mpi_cmp_int(X, small_prime[i]) <= 0) {
+            return 1;
+        }
 
-        MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, small_prime[i] ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, small_prime[i]));
 
-        if( r == 0 )
-            return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
+        if (r == 0) {
+            return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
+        }
     }
 
 cleanup:
-    return( ret );
+    return ret;
 }
 
 /*
  * Miller-Rabin pseudo-primality test  (HAC 4.24)
  */
-static int mpi_miller_rabin( const mbedtls_mpi *X, size_t rounds,
-                             int (*f_rng)(void *, unsigned char *, size_t),
-                             void *p_rng )
+static int mpi_miller_rabin(const mbedtls_mpi *X, size_t rounds,
+                            int (*f_rng)(void *, unsigned char *, size_t),
+                            void *p_rng)
 {
     int ret, count;
     size_t i, j, k, s;
     mbedtls_mpi W, R, T, A, RR;
 
-    MPI_VALIDATE_RET( X     != NULL );
-    MPI_VALIDATE_RET( f_rng != NULL );
+    MPI_VALIDATE_RET(X     != NULL);
+    MPI_VALIDATE_RET(f_rng != NULL);
 
-    mbedtls_mpi_init_mempool( &W ); mbedtls_mpi_init_mempool( &R );
-    mbedtls_mpi_init_mempool( &T ); mbedtls_mpi_init_mempool( &A );
-    mbedtls_mpi_init_mempool( &RR );
+    mbedtls_mpi_init_mempool(&W); mbedtls_mpi_init_mempool(&R);
+    mbedtls_mpi_init_mempool(&T); mbedtls_mpi_init_mempool(&A);
+    mbedtls_mpi_init_mempool(&RR);
 
     /*
      * W = |X| - 1
      * R = W >> lsb( W )
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &W, X, 1 ) );
-    s = mbedtls_mpi_lsb( &W );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &R, &W ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &R, s ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_sub_int(&W, X, 1));
+    s = mbedtls_mpi_lsb(&W);
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&R, &W));
+    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&R, s));
 
-    for( i = 0; i < rounds; i++ )
-    {
+    for (i = 0; i < rounds; i++) {
         /*
          * pick a random A, 1 < A < |X| - 1
          */
         count = 0;
         do {
-            MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( &A, X->n * ciL, f_rng, p_rng ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(&A, X->n * ciL, f_rng, p_rng));
 
-            j = mbedtls_mpi_bitlen( &A );
-            k = mbedtls_mpi_bitlen( &W );
+            j = mbedtls_mpi_bitlen(&A);
+            k = mbedtls_mpi_bitlen(&W);
             if (j > k) {
-                A.p[A.n - 1] &= ( (mbedtls_mpi_uint) 1 << ( k - ( A.n - 1 ) * biL - 1 ) ) - 1;
+                A.p[A.n - 1] &= ((mbedtls_mpi_uint) 1 << (k - (A.n - 1) * biL - 1)) - 1;
             }
 
             if (count++ > 300) {
@@ -2766,29 +2371,30 @@
                 goto cleanup;
             }
 
-        } while ( mbedtls_mpi_cmp_mpi( &A, &W ) >= 0 ||
-                  mbedtls_mpi_cmp_int( &A, 1 )  <= 0    );
+        } while (mbedtls_mpi_cmp_mpi(&A, &W) >= 0 ||
+                 mbedtls_mpi_cmp_int(&A, 1)  <= 0);
 
         /*
          * A = A^R mod |X|
          */
-        MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &A, &A, &R, X, &RR ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&A, &A, &R, X, &RR));
 
-        if( mbedtls_mpi_cmp_mpi( &A, &W ) == 0 ||
-            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
+        if (mbedtls_mpi_cmp_mpi(&A, &W) == 0 ||
+            mbedtls_mpi_cmp_int(&A,  1) == 0) {
             continue;
+        }
 
         j = 1;
-        while( j < s && mbedtls_mpi_cmp_mpi( &A, &W ) != 0 )
-        {
+        while (j < s && mbedtls_mpi_cmp_mpi(&A, &W) != 0) {
             /*
              * A = A * A mod |X|
              */
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &T, &A, &A ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &A, &T, X  ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&T, &A, &A));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&A, &T, X));
 
-            if( mbedtls_mpi_cmp_int( &A, 1 ) == 0 )
+            if (mbedtls_mpi_cmp_int(&A, 1) == 0) {
                 break;
+            }
 
             j++;
         }
@@ -2796,75 +2402,56 @@
         /*
          * not prime if A != |X| - 1 or A == 1
          */
-        if( mbedtls_mpi_cmp_mpi( &A, &W ) != 0 ||
-            mbedtls_mpi_cmp_int( &A,  1 ) == 0 )
-        {
+        if (mbedtls_mpi_cmp_mpi(&A, &W) != 0 ||
+            mbedtls_mpi_cmp_int(&A,  1) == 0) {
             ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
             break;
         }
     }
 
 cleanup:
-    mbedtls_mpi_free( &W ); mbedtls_mpi_free( &R );
-    mbedtls_mpi_free( &T ); mbedtls_mpi_free( &A );
-    mbedtls_mpi_free( &RR );
+    mbedtls_mpi_free(&W); mbedtls_mpi_free(&R);
+    mbedtls_mpi_free(&T); mbedtls_mpi_free(&A);
+    mbedtls_mpi_free(&RR);
 
-    return( ret );
+    return ret;
 }
 
 /*
  * Pseudo-primality test: small factors, then Miller-Rabin
  */
-int mbedtls_mpi_is_prime_ext( const mbedtls_mpi *X, int rounds,
-                              int (*f_rng)(void *, unsigned char *, size_t),
-                              void *p_rng )
+int mbedtls_mpi_is_prime_ext(const mbedtls_mpi *X, int rounds,
+                             int (*f_rng)(void *, unsigned char *, size_t),
+                             void *p_rng)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     mbedtls_mpi XX;
-    MPI_VALIDATE_RET( X     != NULL );
-    MPI_VALIDATE_RET( f_rng != NULL );
+    MPI_VALIDATE_RET(X     != NULL);
+    MPI_VALIDATE_RET(f_rng != NULL);
 
     XX.s = 1;
     XX.n = X->n;
     XX.p = X->p;
 
-    if( mbedtls_mpi_cmp_int( &XX, 0 ) == 0 ||
-        mbedtls_mpi_cmp_int( &XX, 1 ) == 0 )
-        return( MBEDTLS_ERR_MPI_NOT_ACCEPTABLE );
-
-    if( mbedtls_mpi_cmp_int( &XX, 2 ) == 0 )
-        return( 0 );
-
-    if( ( ret = mpi_check_small_factors( &XX ) ) != 0 )
-    {
-        if( ret == 1 )
-            return( 0 );
-
-        return( ret );
+    if (mbedtls_mpi_cmp_int(&XX, 0) == 0 ||
+        mbedtls_mpi_cmp_int(&XX, 1) == 0) {
+        return MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
     }
 
-    return( mpi_miller_rabin( &XX, rounds, f_rng, p_rng ) );
-}
+    if (mbedtls_mpi_cmp_int(&XX, 2) == 0) {
+        return 0;
+    }
 
-#if !defined(MBEDTLS_DEPRECATED_REMOVED)
-/*
- * Pseudo-primality test, error probability 2^-80
- */
-int mbedtls_mpi_is_prime( const mbedtls_mpi *X,
-                  int (*f_rng)(void *, unsigned char *, size_t),
-                  void *p_rng )
-{
-    MPI_VALIDATE_RET( X     != NULL );
-    MPI_VALIDATE_RET( f_rng != NULL );
+    if ((ret = mpi_check_small_factors(&XX)) != 0) {
+        if (ret == 1) {
+            return 0;
+        }
 
-    /*
-     * In the past our key generation aimed for an error rate of at most
-     * 2^-80. Since this function is deprecated, aim for the same certainty
-     * here as well.
-     */
-    return( mbedtls_mpi_is_prime_ext( X, 40, f_rng, p_rng ) );
+        return ret;
+    }
+
+    return mpi_miller_rabin(&XX, rounds, f_rng, p_rng);
 }
-#endif
 
 /*
  * Prime number generation
@@ -2873,9 +2460,9 @@
  * be either 1024 bits or 1536 bits long, and flags must contain
  * MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR.
  */
-int mbedtls_mpi_gen_prime( mbedtls_mpi *X, size_t nbits, int flags,
-                   int (*f_rng)(void *, unsigned char *, size_t),
-                   void *p_rng )
+int mbedtls_mpi_gen_prime(mbedtls_mpi *X, size_t nbits, int flags,
+                          int (*f_rng)(void *, unsigned char *, size_t),
+                          void *p_rng)
 {
 #ifdef MBEDTLS_HAVE_INT64
 // ceil(2^63.5)
@@ -2890,107 +2477,108 @@
     mbedtls_mpi_uint r;
     mbedtls_mpi Y;
 
-    MPI_VALIDATE_RET( X     != NULL );
-    MPI_VALIDATE_RET( f_rng != NULL );
+    MPI_VALIDATE_RET(X     != NULL);
+    MPI_VALIDATE_RET(f_rng != NULL);
 
-    if( nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS )
-        return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA );
+    if (nbits < 3 || nbits > MBEDTLS_MPI_MAX_BITS) {
+        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
+    }
 
-    mbedtls_mpi_init_mempool( &Y );
+    mbedtls_mpi_init_mempool(&Y);
 
-    n = BITS_TO_LIMBS( nbits );
+    n = BITS_TO_LIMBS(nbits);
 
-    if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR ) == 0 )
-    {
+    if ((flags & MBEDTLS_MPI_GEN_PRIME_FLAG_LOW_ERR) == 0) {
         /*
          * 2^-80 error probability, number of rounds chosen per HAC, table 4.4
          */
-        rounds = ( ( nbits >= 1300 ) ?  2 : ( nbits >=  850 ) ?  3 :
-                   ( nbits >=  650 ) ?  4 : ( nbits >=  350 ) ?  8 :
-                   ( nbits >=  250 ) ? 12 : ( nbits >=  150 ) ? 18 : 27 );
-    }
-    else
-    {
+        rounds = ((nbits >= 1300) ?  2 : (nbits >=  850) ?  3 :
+                  (nbits >=  650) ?  4 : (nbits >=  350) ?  8 :
+                  (nbits >=  250) ? 12 : (nbits >=  150) ? 18 : 27);
+    } else {
         /*
          * 2^-100 error probability, number of rounds computed based on HAC,
          * fact 4.48
          */
-        rounds = ( ( nbits >= 1450 ) ?  4 : ( nbits >=  1150 ) ?  5 :
-                   ( nbits >= 1000 ) ?  6 : ( nbits >=   850 ) ?  7 :
-                   ( nbits >=  750 ) ?  8 : ( nbits >=   500 ) ? 13 :
-                   ( nbits >=  250 ) ? 28 : ( nbits >=   150 ) ? 40 : 51 );
+        rounds = ((nbits >= 1450) ?  4 : (nbits >=  1150) ?  5 :
+                  (nbits >= 1000) ?  6 : (nbits >=   850) ?  7 :
+                  (nbits >=  750) ?  8 : (nbits >=   500) ? 13 :
+                  (nbits >=  250) ? 28 : (nbits >=   150) ? 40 : 51);
     }
 
-    while( 1 )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_fill_random( X, n * ciL, f_rng, p_rng ) );
+    while (1) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_fill_random(X, n * ciL, f_rng, p_rng));
         /* make sure generated number is at least (nbits-1)+0.5 bits (FIPS 186-4 §B.3.3 steps 4.4, 5.5) */
-        if( X->p[n-1] < CEIL_MAXUINT_DIV_SQRT2 ) continue;
+        if (X->p[n-1] < CEIL_MAXUINT_DIV_SQRT2) {
+            continue;
+        }
 
         k = n * biL;
-        if( k > nbits ) MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( X, k - nbits ) );
+        if (k > nbits) {
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(X, k - nbits));
+        }
         X->p[0] |= 1;
 
-        if( ( flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH ) == 0 )
-        {
-            ret = mbedtls_mpi_is_prime_ext( X, rounds, f_rng, p_rng );
+        if ((flags & MBEDTLS_MPI_GEN_PRIME_FLAG_DH) == 0) {
+            ret = mbedtls_mpi_is_prime_ext(X, rounds, f_rng, p_rng);
 
-            if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
+            if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) {
                 goto cleanup;
-        }
-        else
-        {
+            }
+        } else {
             /*
-             * An necessary condition for Y and X = 2Y + 1 to be prime
+             * A necessary condition for Y and X = 2Y + 1 to be prime
              * is X = 2 mod 3 (which is equivalent to Y = 2 mod 3).
              * Make sure it is satisfied, while keeping X = 3 mod 4
              */
 
             X->p[0] |= 2;
 
-            MBEDTLS_MPI_CHK( mbedtls_mpi_mod_int( &r, X, 3 ) );
-            if( r == 0 )
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 8 ) );
-            else if( r == 1 )
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( X, X, 4 ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_mod_int(&r, X, 3));
+            if (r == 0) {
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 8));
+            } else if (r == 1) {
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X, X, 4));
+            }
 
             /* Set Y = (X-1) / 2, which is X / 2 because X is odd */
-            MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &Y, X ) );
-            MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &Y, 1 ) );
+            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Y, X));
+            MBEDTLS_MPI_CHK(mbedtls_mpi_shift_r(&Y, 1));
 
-            while( 1 )
-            {
+            while (1) {
                 /*
                  * First, check small factors for X and Y
                  * before doing Miller-Rabin on any of them
                  */
-                if( ( ret = mpi_check_small_factors(  X         ) ) == 0 &&
-                    ( ret = mpi_check_small_factors( &Y         ) ) == 0 &&
-                    ( ret = mpi_miller_rabin(  X, rounds, f_rng, p_rng  ) )
-                                                                    == 0 &&
-                    ( ret = mpi_miller_rabin( &Y, rounds, f_rng, p_rng  ) )
-                                                                    == 0 )
+                if ((ret = mpi_check_small_factors(X)) == 0 &&
+                    (ret = mpi_check_small_factors(&Y)) == 0 &&
+                    (ret = mpi_miller_rabin(X, rounds, f_rng, p_rng))
+                    == 0 &&
+                    (ret = mpi_miller_rabin(&Y, rounds, f_rng, p_rng))
+                    == 0) {
                     goto cleanup;
+                }
 
-                if( ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE )
+                if (ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE) {
                     goto cleanup;
+                }
 
                 /*
                  * Next candidates. We want to preserve Y = (X-1) / 2 and
                  * Y = 1 mod 2 and Y = 2 mod 3 (eq X = 3 mod 4 and X = 2 mod 3)
                  * so up Y by 6 and X by 12.
                  */
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int(  X,  X, 12 ) );
-                MBEDTLS_MPI_CHK( mbedtls_mpi_add_int( &Y, &Y, 6  ) );
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(X,  X, 12));
+                MBEDTLS_MPI_CHK(mbedtls_mpi_add_int(&Y, &Y, 6));
             }
         }
     }
 
 cleanup:
 
-    mbedtls_mpi_free( &Y );
+    mbedtls_mpi_free(&Y);
 
-    return( ret );
+    return ret;
 }
 
 #endif /* MBEDTLS_GENPRIME */
@@ -3009,164 +2597,175 @@
 /*
  * Checkup routine
  */
-int mbedtls_mpi_self_test( int verbose )
+int mbedtls_mpi_self_test(int verbose)
 {
     int ret, i;
     mbedtls_mpi A, E, N, X, Y, U, V;
 
-    mbedtls_mpi_init_mempool( &A ); mbedtls_mpi_init_mempool( &E );
-    mbedtls_mpi_init_mempool( &N ); mbedtls_mpi_init_mempool( &X );
-    mbedtls_mpi_init_mempool( &Y ); mbedtls_mpi_init_mempool( &U );
-    mbedtls_mpi_init_mempool( &V );
+    mbedtls_mpi_init_mempool(&A); mbedtls_mpi_init_mempool(&E);
+    mbedtls_mpi_init_mempool(&N); mbedtls_mpi_init_mempool(&X);
+    mbedtls_mpi_init_mempool(&Y); mbedtls_mpi_init_mempool(&U);
+    mbedtls_mpi_init_mempool(&V);
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &A, 16,
-        "EFE021C2645FD1DC586E69184AF4A31E" \
-        "D5F53E93B5F123FA41680867BA110131" \
-        "944FE7952E2517337780CB0DB80E61AA" \
-        "E7C8DDC6C5C6AADEB34EB38A2F40D5E6" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&A, 16,
+                                            "EFE021C2645FD1DC586E69184AF4A31E" \
+                                            "D5F53E93B5F123FA41680867BA110131" \
+                                            "944FE7952E2517337780CB0DB80E61AA" \
+                                            "E7C8DDC6C5C6AADEB34EB38A2F40D5E6"));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &E, 16,
-        "B2E7EFD37075B9F03FF989C7C5051C20" \
-        "34D2A323810251127E7BF8625A4F49A5" \
-        "F3E27F4DA8BD59C47D6DAABA4C8127BD" \
-        "5B5C25763222FEFCCFC38B832366C29E" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&E, 16,
+                                            "B2E7EFD37075B9F03FF989C7C5051C20" \
+                                            "34D2A323810251127E7BF8625A4F49A5" \
+                                            "F3E27F4DA8BD59C47D6DAABA4C8127BD" \
+                                            "5B5C25763222FEFCCFC38B832366C29E"));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &N, 16,
-        "0066A198186C18C10B2F5ED9B522752A" \
-        "9830B69916E535C8F047518A889A43A5" \
-        "94B6BED27A168D31D4A52F88925AA8F5" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&N, 16,
+                                            "0066A198186C18C10B2F5ED9B522752A" \
+                                            "9830B69916E535C8F047518A889A43A5" \
+                                            "94B6BED27A168D31D4A52F88925AA8F5"));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &X, &A, &N ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_mul_mpi(&X, &A, &N));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
-        "602AB7ECA597A3D6B56FF9829A5E8B85" \
-        "9E857EA95A03512E2BAE7391688D264A" \
-        "A5663B0341DB9CCFD2C4C5F421FEC814" \
-        "8001B72E848A38CAE1C65F78E56ABDEF" \
-        "E12D3C039B8A02D6BE593F0BBBDA56F1" \
-        "ECF677152EF804370C1A305CAF3B5BF1" \
-        "30879B56C61DE584A0F53A2447A51E" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
+                                            "602AB7ECA597A3D6B56FF9829A5E8B85" \
+                                            "9E857EA95A03512E2BAE7391688D264A" \
+                                            "A5663B0341DB9CCFD2C4C5F421FEC814" \
+                                            "8001B72E848A38CAE1C65F78E56ABDEF" \
+                                            "E12D3C039B8A02D6BE593F0BBBDA56F1" \
+                                            "ECF677152EF804370C1A305CAF3B5BF1" \
+                                            "30879B56C61DE584A0F53A2447A51E"));
 
-    if( verbose != 0 )
-        mbedtls_printf( "  MPI test #1 (mul_mpi): " );
+    if (verbose != 0) {
+        mbedtls_printf("  MPI test #1 (mul_mpi): ");
+    }
 
-    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
-    {
-        if( verbose != 0 )
-            mbedtls_printf( "failed\n" );
+    if (mbedtls_mpi_cmp_mpi(&X, &U) != 0) {
+        if (verbose != 0) {
+            mbedtls_printf("failed\n");
+        }
 
         ret = 1;
         goto cleanup;
     }
 
-    if( verbose != 0 )
-        mbedtls_printf( "passed\n" );
+    if (verbose != 0) {
+        mbedtls_printf("passed\n");
+    }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_div_mpi( &X, &Y, &A, &N ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_div_mpi(&X, &Y, &A, &N));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
-        "256567336059E52CAE22925474705F39A94" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
+                                            "256567336059E52CAE22925474705F39A94"));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &V, 16,
-        "6613F26162223DF488E9CD48CC132C7A" \
-        "0AC93C701B001B092E4E5B9F73BCD27B" \
-        "9EE50D0657C77F374E903CDFA4C642" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&V, 16,
+                                            "6613F26162223DF488E9CD48CC132C7A" \
+                                            "0AC93C701B001B092E4E5B9F73BCD27B" \
+                                            "9EE50D0657C77F374E903CDFA4C642"));
 
-    if( verbose != 0 )
-        mbedtls_printf( "  MPI test #2 (div_mpi): " );
+    if (verbose != 0) {
+        mbedtls_printf("  MPI test #2 (div_mpi): ");
+    }
 
-    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 ||
-        mbedtls_mpi_cmp_mpi( &Y, &V ) != 0 )
-    {
-        if( verbose != 0 )
-            mbedtls_printf( "failed\n" );
+    if (mbedtls_mpi_cmp_mpi(&X, &U) != 0 ||
+        mbedtls_mpi_cmp_mpi(&Y, &V) != 0) {
+        if (verbose != 0) {
+            mbedtls_printf("failed\n");
+        }
 
         ret = 1;
         goto cleanup;
     }
 
-    if( verbose != 0 )
-        mbedtls_printf( "passed\n" );
+    if (verbose != 0) {
+        mbedtls_printf("passed\n");
+    }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_exp_mod( &X, &A, &E, &N, NULL ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_exp_mod(&X, &A, &E, &N, NULL));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
-        "36E139AEA55215609D2816998ED020BB" \
-        "BD96C37890F65171D948E9BC7CBAA4D9" \
-        "325D24D6A3C12710F10A09FA08AB87" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
+                                            "36E139AEA55215609D2816998ED020BB" \
+                                            "BD96C37890F65171D948E9BC7CBAA4D9" \
+                                            "325D24D6A3C12710F10A09FA08AB87"));
 
-    if( verbose != 0 )
-        mbedtls_printf( "  MPI test #3 (exp_mod): " );
+    if (verbose != 0) {
+        mbedtls_printf("  MPI test #3 (exp_mod): ");
+    }
 
-    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
-    {
-        if( verbose != 0 )
-            mbedtls_printf( "failed\n" );
+    if (mbedtls_mpi_cmp_mpi(&X, &U) != 0) {
+        if (verbose != 0) {
+            mbedtls_printf("failed\n");
+        }
 
         ret = 1;
         goto cleanup;
     }
 
-    if( verbose != 0 )
-        mbedtls_printf( "passed\n" );
+    if (verbose != 0) {
+        mbedtls_printf("passed\n");
+    }
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_inv_mod( &X, &A, &N ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_inv_mod(&X, &A, &N));
 
-    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &U, 16,
-        "003A0AAEDD7E784FC07D8F9EC6E3BFD5" \
-        "C3DBA76456363A10869622EAC2DD84EC" \
-        "C5B8A74DAC4D09E03B5E0BE779F2DF61" ) );
+    MBEDTLS_MPI_CHK(mbedtls_mpi_read_string(&U, 16,
+                                            "003A0AAEDD7E784FC07D8F9EC6E3BFD5" \
+                                            "C3DBA76456363A10869622EAC2DD84EC" \
+                                            "C5B8A74DAC4D09E03B5E0BE779F2DF61"));
 
-    if( verbose != 0 )
-        mbedtls_printf( "  MPI test #4 (inv_mod): " );
+    if (verbose != 0) {
+        mbedtls_printf("  MPI test #4 (inv_mod): ");
+    }
 
-    if( mbedtls_mpi_cmp_mpi( &X, &U ) != 0 )
-    {
-        if( verbose != 0 )
-            mbedtls_printf( "failed\n" );
+    if (mbedtls_mpi_cmp_mpi(&X, &U) != 0) {
+        if (verbose != 0) {
+            mbedtls_printf("failed\n");
+        }
 
         ret = 1;
         goto cleanup;
     }
 
-    if( verbose != 0 )
-        mbedtls_printf( "passed\n" );
+    if (verbose != 0) {
+        mbedtls_printf("passed\n");
+    }
 
-    if( verbose != 0 )
-        mbedtls_printf( "  MPI test #5 (simple gcd): " );
+    if (verbose != 0) {
+        mbedtls_printf("  MPI test #5 (simple gcd): ");
+    }
 
-    for( i = 0; i < GCD_PAIR_COUNT; i++ )
-    {
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &X, gcd_pairs[i][0] ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Y, gcd_pairs[i][1] ) );
+    for (i = 0; i < GCD_PAIR_COUNT; i++) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&X, gcd_pairs[i][0]));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&Y, gcd_pairs[i][1]));
 
-        MBEDTLS_MPI_CHK( mbedtls_mpi_gcd( &A, &X, &Y ) );
+        MBEDTLS_MPI_CHK(mbedtls_mpi_gcd(&A, &X, &Y));
 
-        if( mbedtls_mpi_cmp_int( &A, gcd_pairs[i][2] ) != 0 )
-        {
-            if( verbose != 0 )
-                mbedtls_printf( "failed at %d\n", i );
+        if (mbedtls_mpi_cmp_int(&A, gcd_pairs[i][2]) != 0) {
+            if (verbose != 0) {
+                mbedtls_printf("failed at %d\n", i);
+            }
 
             ret = 1;
             goto cleanup;
         }
     }
 
-    if( verbose != 0 )
-        mbedtls_printf( "passed\n" );
+    if (verbose != 0) {
+        mbedtls_printf("passed\n");
+    }
 
 cleanup:
 
-    if( ret != 0 && verbose != 0 )
-        mbedtls_printf( "Unexpected error, return code = %08X\n", (unsigned int) ret );
+    if (ret != 0 && verbose != 0) {
+        mbedtls_printf("Unexpected error, return code = %08X\n", (unsigned int) ret);
+    }
 
-    mbedtls_mpi_free( &A ); mbedtls_mpi_free( &E ); mbedtls_mpi_free( &N ); mbedtls_mpi_free( &X );
-    mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &U ); mbedtls_mpi_free( &V );
+    mbedtls_mpi_free(&A); mbedtls_mpi_free(&E); mbedtls_mpi_free(&N); mbedtls_mpi_free(&X);
+    mbedtls_mpi_free(&Y); mbedtls_mpi_free(&U); mbedtls_mpi_free(&V);
 
-    if( verbose != 0 )
-        mbedtls_printf( "\n" );
+    if (verbose != 0) {
+        mbedtls_printf("\n");
+    }
 
-    return( ret );
+    return ret;
 }
 
 #endif /* MBEDTLS_SELF_TEST */