Merge pull request #8831 from yanesca/switch_to_new_exp

Use mpi_core_exp_mod in bignum
diff --git a/ChangeLog.d/use_exp_mod_core.txt b/ChangeLog.d/use_exp_mod_core.txt
new file mode 100644
index 0000000..8f7193a
--- /dev/null
+++ b/ChangeLog.d/use_exp_mod_core.txt
@@ -0,0 +1,6 @@
+Changes
+   * mbedtls_mpi_exp_mod and code that uses it, notably RSA and DHM operations,
+     have changed their speed/memory compromise as part of a proactive security
+     improvement. The new default value of MBEDTLS_MPI_WINDOW_SIZE roughly
+     preserves the current speed, at the expense of increasing memory
+     consumption.
diff --git a/include/mbedtls/bignum.h b/include/mbedtls/bignum.h
index 931e06d..71d7b97 100644
--- a/include/mbedtls/bignum.h
+++ b/include/mbedtls/bignum.h
@@ -51,15 +51,15 @@
 
 #if !defined(MBEDTLS_MPI_WINDOW_SIZE)
 /*
- * Maximum window size used for modular exponentiation. Default: 2
+ * Maximum window size used for modular exponentiation. Default: 3
  * Minimum value: 1. Maximum value: 6.
  *
  * Result is an array of ( 2 ** MBEDTLS_MPI_WINDOW_SIZE ) MPIs used
- * for the sliding window calculation. (So 64 by default)
+ * for the sliding window calculation. (So 8 by default)
  *
  * Reduction in size, reduces speed.
  */
-#define MBEDTLS_MPI_WINDOW_SIZE                           2        /**< Maximum window size used. */
+#define MBEDTLS_MPI_WINDOW_SIZE                           3        /**< Maximum window size used. */
 #endif /* !MBEDTLS_MPI_WINDOW_SIZE */
 
 #if !defined(MBEDTLS_MPI_MAX_SIZE)
diff --git a/library/bignum.c b/library/bignum.c
index d3d72ab..eecdf1e 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -37,6 +37,19 @@
 
 #include "mbedtls/platform.h"
 
+
+
+/*
+ * Conditionally select an MPI sign in constant time.
+ * (MPI sign is the field s in mbedtls_mpi. It is unsigned short and only 1 and -1 are valid
+ * values.)
+ */
+static inline signed short mbedtls_ct_mpi_sign_if(mbedtls_ct_condition_t cond,
+                                                  signed short sign1, signed short sign2)
+{
+    return (signed short) mbedtls_ct_uint_if(cond, sign1 + 1, sign2 + 1) - 1;
+}
+
 /*
  * Compare signed values in constant time
  */
@@ -112,7 +125,7 @@
     {
         mbedtls_ct_condition_t do_assign = mbedtls_ct_bool(assign);
 
-        X->s = (int) mbedtls_ct_uint_if(do_assign, Y->s, X->s);
+        X->s = mbedtls_ct_mpi_sign_if(do_assign, Y->s, X->s);
 
         mbedtls_mpi_core_cond_assign(X->p, Y->p, Y->n, do_assign);
 
@@ -149,8 +162,8 @@
     MBEDTLS_MPI_CHK(mbedtls_mpi_grow(Y, X->n));
 
     s = X->s;
-    X->s = (int) mbedtls_ct_uint_if(do_swap, Y->s, X->s);
-    Y->s = (int) mbedtls_ct_uint_if(do_swap, s, Y->s);
+    X->s = mbedtls_ct_mpi_sign_if(do_swap, Y->s, X->s);
+    Y->s = mbedtls_ct_mpi_sign_if(do_swap, s, Y->s);
 
     mbedtls_mpi_core_cond_swap(X->p, Y->p, X->n, do_swap);
 
@@ -288,8 +301,7 @@
  * This function is not constant-time. Leading zeros in Y may be removed.
  *
  * Ensure that X does not shrink. This is not guaranteed by the public API,
- * but some code in the bignum module relies on this property, for example
- * in mbedtls_mpi_exp_mod().
+ * but some code in the bignum module might still rely on this property.
  */
 int mbedtls_mpi_copy(mbedtls_mpi *X, const mbedtls_mpi *Y)
 {
@@ -1598,98 +1610,11 @@
     return 0;
 }
 
-static void mpi_montg_init(mbedtls_mpi_uint *mm, const mbedtls_mpi *N)
-{
-    *mm = mbedtls_mpi_core_montmul_init(N->p);
-}
-
-/** Montgomery multiplication: A = A * B * R^-1 mod N  (HAC 14.36)
- *
- * \param[in,out]   A   One of the numbers to multiply.
- *                      It must have at least as many limbs as N
- *                      (A->n >= N->n), and any limbs beyond n are ignored.
- *                      On successful completion, A contains the result of
- *                      the multiplication A * B * R^-1 mod N where
- *                      R = (2^ciL)^n.
- * \param[in]       B   One of the numbers to multiply.
- *                      It must be nonzero and must not have more limbs than N
- *                      (B->n <= N->n).
- * \param[in]       N   The modulus. \p N must be odd.
- * \param           mm  The value calculated by `mpi_montg_init(&mm, N)`.
- *                      This is -N^-1 mod 2^ciL.
- * \param[in,out]   T   A bignum for temporary storage.
- *                      It must be at least twice the limb size of N plus 1
- *                      (T->n >= 2 * N->n + 1).
- *                      Its initial content is unused and
- *                      its final content is indeterminate.
- *                      It does not get reallocated.
- */
-static void mpi_montmul(mbedtls_mpi *A, const mbedtls_mpi *B,
-                        const mbedtls_mpi *N, mbedtls_mpi_uint mm,
-                        mbedtls_mpi *T)
-{
-    mbedtls_mpi_core_montmul(A->p, A->p, B->p, B->n, N->p, N->n, mm, T->p);
-}
-
-/*
- * Montgomery reduction: A = A * R^-1 mod N
- *
- * See mpi_montmul() regarding constraints and guarantees on the parameters.
- */
-static void mpi_montred(mbedtls_mpi *A, const mbedtls_mpi *N,
-                        mbedtls_mpi_uint mm, mbedtls_mpi *T)
-{
-    mbedtls_mpi_uint z = 1;
-    mbedtls_mpi U;
-    U.n = 1;
-    U.s = 1;
-    U.p = &z;
-
-    mpi_montmul(A, &U, N, mm, T);
-}
-
-/**
- * Select an MPI from a table without leaking the index.
- *
- * This is functionally equivalent to mbedtls_mpi_copy(R, T[idx]) except it
- * reads the entire table in order to avoid leaking the value of idx to an
- * attacker able to observe memory access patterns.
- *
- * \param[out] R        Where to write the selected MPI.
- * \param[in] T         The table to read from.
- * \param[in] T_size    The number of elements in the table.
- * \param[in] idx       The index of the element to select;
- *                      this must satisfy 0 <= idx < T_size.
- *
- * \return \c 0 on success, or a negative error code.
- */
-static int mpi_select(mbedtls_mpi *R, const mbedtls_mpi *T, size_t T_size, size_t idx)
-{
-    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-
-    for (size_t i = 0; i < T_size; i++) {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_safe_cond_assign(R, &T[i],
-                                                     (unsigned char) mbedtls_ct_uint_eq(i, idx)));
-    }
-cleanup:
-    return ret;
-}
-
-/*
- * Sliding-window exponentiation: X = A^E mod N  (HAC 14.85)
- */
 int mbedtls_mpi_exp_mod(mbedtls_mpi *X, const mbedtls_mpi *A,
                         const mbedtls_mpi *E, const mbedtls_mpi *N,
                         mbedtls_mpi *prec_RR)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t window_bitsize;
-    size_t i, j, nblimbs;
-    size_t bufsize, nbits;
-    size_t exponent_bits_in_window = 0;
-    mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[(size_t) 1 << MBEDTLS_MPI_WINDOW_SIZE], WW, Apos;
-    int neg;
 
     if (mbedtls_mpi_cmp_int(N, 0) <= 0 || (N->p[0] & 1) == 0) {
         return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
@@ -1705,261 +1630,86 @@
     }
 
     /*
-     * Init temps and window size
+     * Ensure that the exponent that we are passing to the core is not NULL.
      */
-    mpi_montg_init(&mm, N);
-    mbedtls_mpi_init(&RR); mbedtls_mpi_init(&T);
-    mbedtls_mpi_init(&Apos);
-    mbedtls_mpi_init(&WW);
-    memset(W, 0, sizeof(W));
-
-    i = mbedtls_mpi_bitlen(E);
-
-    window_bitsize = (i > 671) ? 6 : (i > 239) ? 5 :
-                     (i >  79) ? 4 : (i >  23) ? 3 : 1;
-
-#if (MBEDTLS_MPI_WINDOW_SIZE < 6)
-    if (window_bitsize > MBEDTLS_MPI_WINDOW_SIZE) {
-        window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
+    if (E->n == 0) {
+        ret = mbedtls_mpi_lset(X, 1);
+        return ret;
     }
-#endif
-
-    const size_t w_table_used_size = (size_t) 1 << window_bitsize;
 
     /*
-     * This function is not constant-trace: its memory accesses depend on the
-     * exponent value. To defend against timing attacks, callers (such as RSA
-     * and DHM) should use exponent blinding. However this is not enough if the
-     * adversary can find the exponent in a single trace, so this function
-     * takes extra precautions against adversaries who can observe memory
-     * access patterns.
-     *
-     * This function performs a series of multiplications by table elements and
-     * squarings, and we want the prevent the adversary from finding out which
-     * table element was used, and from distinguishing between multiplications
-     * and squarings. Firstly, when multiplying by an element of the window
-     * W[i], we do a constant-trace table lookup to obfuscate i. This leaves
-     * squarings as having a different memory access patterns from other
-     * multiplications. So secondly, we put the accumulator in the table as
-     * well, and also do a constant-trace table lookup to multiply by the
-     * accumulator which is W[x_index].
-     *
-     * This way, all multiplications take the form of a lookup-and-multiply.
-     * The number of lookup-and-multiply operations inside each iteration of
-     * the main loop still depends on the bits of the exponent, but since the
-     * other operations in the loop don't have an easily recognizable memory
-     * trace, an adversary is unlikely to be able to observe the exact
-     * patterns.
-     *
-     * An adversary may still be able to recover the exponent if they can
-     * observe both memory accesses and branches. However, branch prediction
-     * exploitation typically requires many traces of execution over the same
-     * data, which is defeated by randomized blinding.
+     * Allocate working memory for mbedtls_mpi_core_exp_mod()
      */
-    const size_t x_index = 0;
-    mbedtls_mpi_init(&W[x_index]);
-
-    j = N->n + 1;
-    /* All W[i] including the accumulator must have at least N->n limbs for
-     * the mpi_montmul() and mpi_montred() calls later. Here we ensure that
-     * W[1] and the accumulator W[x_index] are large enough. later we'll grow
-     * other W[i] to the same length. They must not be shrunk midway through
-     * this function!
-     */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[x_index], j));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1],  j));
-    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&T, j * 2));
-
-    /*
-     * Compensate for negative A (and correct at the end)
-     */
-    neg = (A->s == -1);
-    if (neg) {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&Apos, A));
-        Apos.s = 1;
-        A = &Apos;
+    size_t T_limbs = mbedtls_mpi_core_exp_mod_working_limbs(N->n, E->n);
+    mbedtls_mpi_uint *T = (mbedtls_mpi_uint *) mbedtls_calloc(T_limbs, sizeof(mbedtls_mpi_uint));
+    if (T == NULL) {
+        return MBEDTLS_ERR_MPI_ALLOC_FAILED;
     }
 
+    mbedtls_mpi RR;
+    mbedtls_mpi_init(&RR);
+
     /*
      * If 1st call, pre-compute R^2 mod N
      */
     if (prec_RR == NULL || prec_RR->p == NULL) {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_lset(&RR, 1));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(&RR, N->n * 2 * biL));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&RR, &RR, N));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_core_get_mont_r2_unsafe(&RR, N));
 
         if (prec_RR != NULL) {
-            memcpy(prec_RR, &RR, sizeof(mbedtls_mpi));
+            *prec_RR = RR;
         }
     } else {
-        memcpy(&RR, prec_RR, sizeof(mbedtls_mpi));
+        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(prec_RR, N->n));
+        RR = *prec_RR;
     }
 
     /*
-     * W[1] = A * R^2 * R^-1 mod N = A * R mod N
+     * To preserve constness we need to make a copy of A. Using X for this to
+     * save memory.
      */
-    if (mbedtls_mpi_cmp_mpi(A, N) >= 0) {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(&W[1], A, N));
-        /* This should be a no-op because W[1] is already that large before
-         * mbedtls_mpi_mod_mpi(), but it's necessary to avoid an overflow
-         * in mpi_montmul() below, so let's make sure. */
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[1], N->n + 1));
-    } else {
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[1], A));
-    }
-
-    /* Note that this is safe because W[1] always has at least N->n limbs
-     * (it grew above and was preserved by mbedtls_mpi_copy()). */
-    mpi_montmul(&W[1], &RR, N, mm, &T);
+    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, A));
 
     /*
-     * W[x_index] = R^2 * R^-1 mod N = R mod N
+     * Compensate for negative A (and correct at the end).
      */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[x_index], &RR));
-    mpi_montred(&W[x_index], N, mm, &T);
-
-
-    if (window_bitsize > 1) {
-        /*
-         * W[i] = W[1] ^ i
-         *
-         * The first bit of the sliding window is always 1 and therefore we
-         * only need to store the second half of the table.
-         *
-         * (There are two special elements in the table: W[0] for the
-         * accumulator/result and W[1] for A in Montgomery form. Both of these
-         * are already set at this point.)
-         */
-        j = w_table_used_size / 2;
-
-        MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[j], N->n + 1));
-        MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[j], &W[1]));
-
-        for (i = 0; i < window_bitsize - 1; i++) {
-            mpi_montmul(&W[j], &W[j], N, mm, &T);
-        }
-
-        /*
-         * W[i] = W[i - 1] * W[1]
-         */
-        for (i = j + 1; i < w_table_used_size; i++) {
-            MBEDTLS_MPI_CHK(mbedtls_mpi_grow(&W[i], N->n + 1));
-            MBEDTLS_MPI_CHK(mbedtls_mpi_copy(&W[i], &W[i - 1]));
-
-            mpi_montmul(&W[i], &W[1], N, mm, &T);
-        }
-    }
-
-    nblimbs = E->n;
-    bufsize = 0;
-    nbits   = 0;
-    state   = 0;
-
-    while (1) {
-        if (bufsize == 0) {
-            if (nblimbs == 0) {
-                break;
-            }
-
-            nblimbs--;
-
-            bufsize = sizeof(mbedtls_mpi_uint) << 3;
-        }
-
-        bufsize--;
-
-        ei = (E->p[nblimbs] >> bufsize) & 1;
-
-        /*
-         * skip leading 0s
-         */
-        if (ei == 0 && state == 0) {
-            continue;
-        }
-
-        if (ei == 0 && state == 1) {
-            /*
-             * out of window, square W[x_index]
-             */
-            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index));
-            mpi_montmul(&W[x_index], &WW, N, mm, &T);
-            continue;
-        }
-
-        /*
-         * add ei to current window
-         */
-        state = 2;
-
-        nbits++;
-        exponent_bits_in_window |= (ei << (window_bitsize - nbits));
-
-        if (nbits == window_bitsize) {
-            /*
-             * W[x_index] = W[x_index]^window_bitsize R^-1 mod N
-             */
-            for (i = 0; i < window_bitsize; i++) {
-                MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size,
-                                           x_index));
-                mpi_montmul(&W[x_index], &WW, N, mm, &T);
-            }
-
-            /*
-             * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
-             */
-            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size,
-                                       exponent_bits_in_window));
-            mpi_montmul(&W[x_index], &WW, N, mm, &T);
-
-            state--;
-            nbits = 0;
-            exponent_bits_in_window = 0;
-        }
-    }
+    X->s = 1;
 
     /*
-     * process the remaining bits
+     * Make sure that X is in a form that is safe for consumption by
+     * the core functions.
+     *
+     * - The core functions will not touch the limbs of X above N->n. The
+     *   result will be correct if those limbs are 0, which the mod call
+     *   ensures.
+     * - Also, X must have at least as many limbs as N for the calls to the
+     *   core functions.
      */
-    for (i = 0; i < nbits; i++) {
-        MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, x_index));
-        mpi_montmul(&W[x_index], &WW, N, mm, &T);
-
-        exponent_bits_in_window <<= 1;
-
-        if ((exponent_bits_in_window & ((size_t) 1 << window_bitsize)) != 0) {
-            MBEDTLS_MPI_CHK(mpi_select(&WW, W, w_table_used_size, 1));
-            mpi_montmul(&W[x_index], &WW, N, mm, &T);
-        }
+    if (mbedtls_mpi_cmp_mpi(X, N) >= 0) {
+        MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N));
     }
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(X, N->n));
 
     /*
-     * W[x_index] = A^E * R * R^-1 mod N = A^E mod N
+     * Convert to and from Montgomery around mbedtls_mpi_core_exp_mod().
      */
-    mpi_montred(&W[x_index], N, mm, &T);
-
-    if (neg && E->n != 0 && (E->p[0] & 1) != 0) {
-        W[x_index].s = -1;
-        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(&W[x_index], N, &W[x_index]));
-    }
+    mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N->p);
+    mbedtls_mpi_core_to_mont_rep(X->p, X->p, N->p, N->n, mm, RR.p, T);
+    mbedtls_mpi_core_exp_mod(X->p, X->p, N->p, N->n, E->p, E->n, RR.p, T);
+    mbedtls_mpi_core_from_mont_rep(X->p, X->p, N->p, N->n, mm, T);
 
     /*
-     * Load the result in the output variable.
+     * Correct for negative A.
      */
-    MBEDTLS_MPI_CHK(mbedtls_mpi_copy(X, &W[x_index]));
+    if (A->s == -1 && (E->p[0] & 1) != 0) {
+        mbedtls_ct_condition_t is_x_non_zero = mbedtls_mpi_core_check_zero_ct(X->p, X->n);
+        X->s = mbedtls_ct_mpi_sign_if(is_x_non_zero, -1, 1);
+
+        MBEDTLS_MPI_CHK(mbedtls_mpi_add_mpi(X, N, X));
+    }
 
 cleanup:
 
-    /* The first bit of the sliding window is always 1 and therefore the first
-     * half of the table was unused. */
-    for (i = w_table_used_size/2; i < w_table_used_size; i++) {
-        mbedtls_mpi_free(&W[i]);
-    }
-
-    mbedtls_mpi_free(&W[x_index]);
-    mbedtls_mpi_free(&W[1]);
-    mbedtls_mpi_free(&T);
-    mbedtls_mpi_free(&Apos);
-    mbedtls_mpi_free(&WW);
+    mbedtls_mpi_zeroize_and_free(T, T_limbs);
 
     if (prec_RR == NULL || prec_RR->p == NULL) {
         mbedtls_mpi_free(&RR);
diff --git a/library/bignum_core.c b/library/bignum_core.c
index dfed60d..1a3e0b9 100644
--- a/library/bignum_core.c
+++ b/library/bignum_core.c
@@ -856,16 +856,17 @@
     return c;
 }
 
-mbedtls_mpi_uint mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
-                                                size_t limbs)
+mbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
+                                                      size_t limbs)
 {
+    volatile const mbedtls_mpi_uint *force_read_A = A;
     mbedtls_mpi_uint bits = 0;
 
     for (size_t i = 0; i < limbs; i++) {
-        bits |= A[i];
+        bits |= force_read_A[i];
     }
 
-    return bits;
+    return mbedtls_ct_bool(bits);
 }
 
 void mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X,
diff --git a/library/bignum_core.h b/library/bignum_core.h
index b56be0a..92c8d47 100644
--- a/library/bignum_core.h
+++ b/library/bignum_core.h
@@ -662,11 +662,11 @@
  * \param[in] A   The MPI to test.
  * \param limbs   Number of limbs in \p A.
  *
- * \return        0 if `A == 0`
- *                non-0 (may be any value) if `A != 0`.
+ * \return        MBEDTLS_CT_FALSE if `A == 0`
+ *                MBEDTLS_CT_TRUE  if `A != 0`.
  */
-mbedtls_mpi_uint mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
-                                                size_t limbs);
+mbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
+                                                      size_t limbs);
 
 /**
  * \brief          Returns the number of limbs of working memory required for
diff --git a/tests/suites/test_suite_bignum.function b/tests/suites/test_suite_bignum.function
index 50be2d2..f3a64e1 100644
--- a/tests/suites/test_suite_bignum.function
+++ b/tests/suites/test_suite_bignum.function
@@ -966,6 +966,45 @@
 /* END_CASE */
 
 /* BEGIN_CASE */
+void mpi_exp_mod_min_RR(char *input_A, char *input_E,
+                        char *input_N, char *input_X,
+                        int exp_result)
+{
+    mbedtls_mpi A, E, N, RR, Z, X;
+    int res;
+    mbedtls_mpi_init(&A); mbedtls_mpi_init(&E); mbedtls_mpi_init(&N);
+    mbedtls_mpi_init(&RR); mbedtls_mpi_init(&Z); mbedtls_mpi_init(&X);
+
+    TEST_EQUAL(mbedtls_test_read_mpi(&A, input_A), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi(&E, input_E), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi(&N, input_N), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi(&X, input_X), 0);
+
+    TEST_EQUAL(mbedtls_mpi_core_get_mont_r2_unsafe(&RR, &N), 0);
+    TEST_EQUAL(mbedtls_mpi_shrink(&RR, 0), 0);
+    /* The objective of this test is to check that exp_mod defends
+     * against a smaller RR. */
+    TEST_LE_U(RR.n, N.n - 1);
+
+    res = mbedtls_mpi_exp_mod(&Z, &A, &E, &N, &RR);
+    /* We know that exp_mod internally needs RR to be as large as N.
+     * Validate that it is the case now, otherwise there was probably
+     * a buffer overread. */
+    TEST_EQUAL(RR.n, N.n);
+
+    TEST_EQUAL(res, exp_result);
+    if (res == 0) {
+        TEST_EQUAL(sign_is_valid(&Z), 1);
+        TEST_EQUAL(mbedtls_mpi_cmp_mpi(&Z, &X), 0);
+    }
+
+exit:
+    mbedtls_mpi_free(&A); mbedtls_mpi_free(&E); mbedtls_mpi_free(&N);
+    mbedtls_mpi_free(&RR); mbedtls_mpi_free(&Z); mbedtls_mpi_free(&X);
+}
+/* END_CASE */
+
+/* BEGIN_CASE */
 void mpi_exp_mod(char *input_A, char *input_E,
                  char *input_N, char *input_X,
                  int exp_result)
diff --git a/tests/suites/test_suite_bignum.misc.data b/tests/suites/test_suite_bignum.misc.data
index c53e42a..eb55dbe 100644
--- a/tests/suites/test_suite_bignum.misc.data
+++ b/tests/suites/test_suite_bignum.misc.data
@@ -1362,6 +1362,9 @@
 Test mbedtls_mpi_exp_mod: 10 ^ 0 (1 limb) mod 9
 mpi_exp_mod:"0a":"00":"09":"1":0
 
+Test mbedtls_mpi_exp_mod: -3 ^ 3 mod 27
+mpi_exp_mod:"-3":"3":"1b":"1b":0
+
 Test mbedtls_mpi_exp_mod: MAX_SIZE exponent
 mpi_exp_mod_size:2:MBEDTLS_MPI_MAX_SIZE:10:"":0
 
@@ -1391,6 +1394,14 @@
 depends_on:MPI_MAX_BITS_LARGER_THAN_792
 mpi_exp_mod:"-9f13012cd92aa72fb86ac8879d2fde4f7fd661aaae43a00971f081cc60ca277059d5c37e89652e2af2585d281d66ef6a9d38a117e9608e9e7574cd142dc55278838a2161dd56db9470d4c1da2d5df15a908ee2eb886aaa890f23be16de59386663a12f1afbb325431a3e835e3fd89b98b96a6f77382f458ef9a37e1f84a03045c8676ab55291a94c2228ea15448ee96b626b998":"40a54d1b9e86789f06d9607fb158672d64867665c73ee9abb545fc7a785634b354c7bae5b962ce8040cf45f2c1f3d3659b2ee5ede17534c8fc2ec85c815e8df1fe7048d12c90ee31b88a68a081f17f0d8ce5f4030521e9400083bcea73a429031d4ca7949c2000d597088e0c39a6014d8bf962b73bb2e8083bd0390a4e00b9b3":"eeaf0ab9adb38dd69c33f80afa8fc5e86072618775ff3c0b9ea2314c9c256576d674df7496ea81d3383b4813d692c6e0e0d5d8e250b98be48e495c1d6089dad15dc7d7b46154d6b6ce8ef4ad69b15d4982559b297bcf1885c529f566660e57ec68edbc3c05726cc02fd4cbf4976eaa9afd5138fe8376435b9fc61d2fc0eb06e3":"21acc7199e1b90f9b4844ffe12c19f00ec548c5d32b21c647d48b6015d8eb9ec9db05b4f3d44db4227a2b5659c1a7cceb9d5fa8fa60376047953ce7397d90aaeb7465e14e820734f84aa52ad0fc66701bcbb991d57715806a11531268e1e83dd48288c72b424a6287e9ce4e5cc4db0dd67614aecc23b0124a5776d36e5c89483":0
 
+Test mbedtls_mpi_exp_mod (N.n=3, RR.n=1 on 32 bit)
+depends_on:MBEDTLS_HAVE_INT32
+mpi_exp_mod_min_RR:"10":"2":"10000000100000001":"100":0
+
+Test mbedtls_mpi_exp_mod (N.n=3, RR.n=1 on 64 bit)
+depends_on:MBEDTLS_HAVE_INT64
+mpi_exp_mod_min_RR:"10":"2":"100000000000000010000000000000001":"100":0
+
 Base test GCD #1
 mpi_gcd:"2b5":"261":"15"