Add modular exponentiation to bignum core

Signed-off-by: Janos Follath <janos.follath@arm.com>
diff --git a/library/bignum_core.c b/library/bignum_core.c
index 34aecda..227351b 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -582,6 +582,141 @@
 
 /* BEGIN MERGE SLOT 1 */
 
+static size_t mpi_exp_mod_get_window_size( size_t Ebits )
+{
+    size_t wsize = ( Ebits > 671 ) ? 6 : ( Ebits > 239 ) ? 5 :
+                   ( Ebits >  79 ) ? 4 : ( Ebits >  23 ) ? 3 : 1;
+
+#if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
+    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
+        wsize = MBEDTLS_MPI_WINDOW_SIZE;
+#endif
+
+    return( wsize );
+}
+
+int mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
+                              mbedtls_mpi_uint const *A,
+                              const mbedtls_mpi_uint *N,
+                              size_t n,
+                              const mbedtls_mpi_uint *E,
+                              size_t E_len,
+                              const mbedtls_mpi_uint *RR )
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+    /* heap allocated memory pool */
+    mbedtls_mpi_uint *mempool = NULL;
+    /* pointers to temporaries within memory pool */
+    mbedtls_mpi_uint *Wtbl, *Wselect, *temp;
+    /* pointers to table entries */
+    mbedtls_mpi_uint *Wcur, *Wlast, *W1;
+
+    size_t wsize, welem;
+    mbedtls_mpi_uint one = 1, mm;
+
+    mm = mbedtls_mpi_core_montmul_init( N ); /* Compute Montgomery constant */
+    E += E_len;               /* Skip to end of exponent buffer */
+
+    wsize = mpi_exp_mod_get_window_size( E_len * biL );
+    welem = 1 << wsize;
+
+    /* Allocate memory pool and set pointers to parts of it */
+    const size_t table_limbs   = welem * n;
+    const size_t temp_limbs    = 2 * n + 1;
+    const size_t wselect_limbs = n;
+    const size_t total_limbs   = table_limbs + temp_limbs + wselect_limbs;
+
+    mempool = mbedtls_calloc( total_limbs, sizeof(mbedtls_mpi_uint) );
+    if( mempool == NULL )
+    {
+        ret = MBEDTLS_ERR_MPI_ALLOC_FAILED;
+        goto cleanup;
+    }
+
+    Wtbl    = mempool;
+    Wselect = Wtbl    + table_limbs;
+    temp    = Wselect + wselect_limbs;
+
+    /*
+     * Window precomputation
+     */
+
+    /* W[0] = 1 (in Montgomery presentation) */
+    memset( Wtbl, 0, n * ciL ); Wtbl[0] = 1;
+    mbedtls_mpi_core_montmul( Wtbl, Wtbl, RR, n, N, n, mm, temp );
+    Wcur = Wtbl + n;
+    /* W[1] = A * R^2 * R^-1 mod N = A * R mod N */
+    memcpy( Wcur, A, n * ciL );
+    mbedtls_mpi_core_montmul( Wcur, Wcur, RR, n, N, n, mm, temp );
+    W1 = Wcur;
+    Wcur += n;
+    /* W[i+1] = W[i] * W[1], i >= 2 */
+    Wlast = W1;
+    for( size_t i=2; i < welem; i++, Wlast += n, Wcur += n )
+        mbedtls_mpi_core_montmul( Wcur, Wlast, W1, n, N, n, mm, temp );
+
+    /*
+     * Sliding window exponentiation
+     */
+
+    /* X = 1 (in Montgomery presentation) initially */
+    memcpy( X, Wtbl, n * ciL );
+
+    size_t limb_bits_remaining = 0;
+    mbedtls_mpi_uint window = 0;
+    size_t window_bits = 0, cur_limb;
+    while( 1 )
+    {
+        size_t window_bits_missing = wsize - window_bits;
+
+        const int no_more_bits =
+            ( limb_bits_remaining == 0 ) && ( E_len == 0 );
+        const int window_full =
+            ( window_bits_missing == 0 );
+
+        /* Clear window if it's full or if we don't have further bits. */
+        if( window_full || no_more_bits )
+        {
+            if( window_bits == 0 )
+                break;
+            /* Select table entry, square and multiply */
+            mbedtls_mpi_core_ct_uint_table_lookup( Wselect, Wtbl,
+                                                   n, welem, window );
+            mbedtls_mpi_core_montmul( X, X, Wselect, n, N, n, mm, temp );
+            window = window_bits = 0;
+            continue;
+        }
+
+        /* Load next exponent limb if necessary */
+        if( limb_bits_remaining == 0 )
+        {
+            cur_limb = *--E;
+            E_len--;
+            limb_bits_remaining = biL;
+        }
+
+        /* Square */
+        mbedtls_mpi_core_montmul( X, X, X, n, N, n, mm, temp );
+
+        /* Insert next exponent bit into window */
+        window   <<= 1;
+        window    |= ( cur_limb >> ( biL - 1 ) );
+        cur_limb <<= 1;
+        window_bits++;
+        limb_bits_remaining--;
+    }
+
+    /* Convert X back to normal presentation */
+    mbedtls_mpi_core_montmul( X, X, &one, 1, N, n, mm, temp );
+
+    ret = 0;
+
+cleanup:
+
+    mbedtls_free( mempool );
+    return( ret );
+}
+
 /* END MERGE SLOT 1 */
 
 /* BEGIN MERGE SLOT 2 */
diff --git a/library/bignum_core.h b/library/bignum_core.h
index ad04e08..82da142 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -496,6 +496,29 @@
 
 /* BEGIN MERGE SLOT 1 */
 
+/**
+ * \brief          Perform a modular exponentiation with secret exponent:
+ *                 X = A^E mod N
+ *
+ * \param[out] X   The destination MPI, as a little endian array of length
+ *                 \p limbs.
+ * \param[in] A    The base MPI, as a little endian array of length \p limbs.
+ * \param[in] N    The modulus, as a little endian array of length \p limbs.
+ * \param limbs    The number of limbs in \p X, \p A, \p N, \p RR.
+ * \param[in] E    The exponent, as a little endian array of length \p E_limbs.
+ * \param E_limbs  The number of limbs in \p E.
+ * \param[in] RR   The precomputed residue of 2^{2*biL} modulo N, as a little
+ *                 endian array of length \p limbs.
+ *
+ * \return         \c 0 if successful.
+ * \return         #MBEDTLS_ERR_MPI_ALLOC_FAILED if a memory allocation failed.
+ */
+int mbedtls_mpi_core_exp_mod( mbedtls_mpi_uint *X,
+                              const mbedtls_mpi_uint *A,
+                              const mbedtls_mpi_uint *N, size_t limbs,
+                              const mbedtls_mpi_uint *E, size_t E_limbs,
+                              const mbedtls_mpi_uint *RR );
+
 /* END MERGE SLOT 1 */
 
 /* BEGIN MERGE SLOT 2 */