Merge pull request #7230 from gabor-mezei-arm/6850_Secp256r1_fast_reduction

Extract Secp256r1 fast reduction from the prototype
diff --git a/library/ecp_curves.c b/library/ecp_curves.c
index f60f8b1..db21d7d 100644
--- a/library/ecp_curves.c
+++ b/library/ecp_curves.c
@@ -4580,6 +4580,8 @@
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
 static int ecp_mod_p256(mbedtls_mpi *);
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p256_raw(mbedtls_mpi_uint *X, size_t X_limbs);
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 static int ecp_mod_p384(mbedtls_mpi *);
@@ -5098,6 +5100,87 @@
 
 #endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
+
+/*
+ * Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
+ */
+static int ecp_mod_p256(mbedtls_mpi *N)
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+    size_t expected_width = 2 * 256 / biL;
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(N, expected_width));
+    ret = mbedtls_ecp_mod_p256_raw(N->p, expected_width);
+cleanup:
+    return ret;
+}
+
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p256_raw(mbedtls_mpi_uint *X, size_t X_limbs)
+{
+    if (X_limbs != 2 * 256 / biL) {
+        return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+    }
+
+    INIT(256);
+
+    ADD(8);  ADD(9);
+    SUB(11); SUB(12); SUB(13); SUB(14);                   NEXT; // A0
+
+    ADD(9);  ADD(10);
+    SUB(12); SUB(13); SUB(14); SUB(15);                   NEXT; // A1
+
+    ADD(10); ADD(11);
+    SUB(13); SUB(14); SUB(15);                            NEXT; // A2
+
+    ADD(11); ADD(11); ADD(12); ADD(12); ADD(13);
+    SUB(15); SUB(8);  SUB(9);                             NEXT; // A3
+
+    ADD(12); ADD(12); ADD(13); ADD(13); ADD(14);
+    SUB(9);  SUB(10);                                     NEXT; // A4
+
+    ADD(13); ADD(13); ADD(14); ADD(14); ADD(15);
+    SUB(10); SUB(11);                                     NEXT; // A5
+
+    ADD(14); ADD(14); ADD(15); ADD(15); ADD(14); ADD(13);
+    SUB(8);  SUB(9);                                      NEXT; // A6
+
+    ADD(15); ADD(15); ADD(15); ADD(8);
+    SUB(10); SUB(11); SUB(12); SUB(13);                         // A7
+
+    RESET;
+
+    /* Use 2^224 * (2^32 - 1) + 2^192 + 2^96 - 1
+     * to modulo reduce the final carry. */
+    ADD_LAST; NEXT;                                             // A0
+    ;         NEXT;                                             // A1
+    ;         NEXT;                                             // A2
+    SUB_LAST; NEXT;                                             // A3
+    ;         NEXT;                                             // A4
+    ;         NEXT;                                             // A5
+    SUB_LAST; NEXT;                                             // A6
+    ADD_LAST;                                                   // A7
+
+    RESET;
+
+    /* Use 2^224 * (2^32 - 1) + 2^192 + 2^96 - 1
+     * to modulo reduce the carry generated by the previous reduction. */
+    ADD_LAST; NEXT;                                             // A0
+    ;         NEXT;                                             // A1
+    ;         NEXT;                                             // A2
+    SUB_LAST; NEXT;                                             // A3
+    ;         NEXT;                                             // A4
+    ;         NEXT;                                             // A5
+    SUB_LAST; NEXT;                                             // A6
+    ADD_LAST;                                                   // A7
+
+    LAST;
+
+    return 0;
+}
+
+#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */
+
 #undef LOAD32
 #undef MAX32
 #undef A
@@ -5118,8 +5201,7 @@
           MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
           MBEDTLS_ECP_DP_SECP384R1_ENABLED */
 
-#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
-    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
+#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /*
  * The reader is advised to first understand ecp_mod_p192() since the same
  * general structure is used here, but with additional complications:
@@ -5240,43 +5322,6 @@
     N->p[bits / 8 / sizeof(mbedtls_mpi_uint)] += msw;
 }
 
-#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
-/*
- * Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
- */
-static int ecp_mod_p256(mbedtls_mpi *N)
-{
-    INIT(256);
-
-    ADD(8); ADD(9);
-    SUB(11); SUB(12); SUB(13); SUB(14);             NEXT;         // A0
-
-    ADD(9); ADD(10);
-    SUB(12); SUB(13); SUB(14); SUB(15);             NEXT;         // A1
-
-    ADD(10); ADD(11);
-    SUB(13); SUB(14); SUB(15);                        NEXT;       // A2
-
-    ADD(11); ADD(11); ADD(12); ADD(12); ADD(13);
-    SUB(15); SUB(8); SUB(9);                        NEXT;         // A3
-
-    ADD(12); ADD(12); ADD(13); ADD(13); ADD(14);
-    SUB(9); SUB(10);                                   NEXT;      // A4
-
-    ADD(13); ADD(13); ADD(14); ADD(14); ADD(15);
-    SUB(10); SUB(11);                                   NEXT;     // A5
-
-    ADD(14); ADD(14); ADD(15); ADD(15); ADD(14); ADD(13);
-    SUB(8); SUB(9);                                   NEXT;       // A6
-
-    ADD(15); ADD(15); ADD(15); ADD(8);
-    SUB(10); SUB(11); SUB(12); SUB(13);             LAST;         // A7
-
-cleanup:
-    return ret;
-}
-#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED */
-
 #if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /*
  * Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4)
diff --git a/library/ecp_invasive.h b/library/ecp_invasive.h
index 1972f8c..cb16d23 100644
--- a/library/ecp_invasive.h
+++ b/library/ecp_invasive.h
@@ -33,8 +33,7 @@
 
 #if defined(MBEDTLS_TEST_HOOKS) && defined(MBEDTLS_ECP_C)
 
-#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
-    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
+#if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /* Preconditions:
  *   - bits is a multiple of 64 or is 224
  *   - c is -1 or -2
@@ -117,6 +116,28 @@
 
 #endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
+
+/** Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
+ *
+ * \param[in,out]   X       The address of the MPI to be converted.
+ *                          Must have exact limb size that stores a 512-bit MPI
+ *                          (double the bitlength of the modulus).
+ *                          Upon return holds the reduced value which is
+ *                          in range `0 <= X < 2 * N` (where N is the modulus).
+ *                          The bitlength of the reduced value is the same as
+ *                          that of the modulus (256 bits).
+ * \param[in]       X_limbs The length of \p X in limbs.
+ *
+ * \return          \c 0 on success.
+ * \return          #MBEDTLS_ERR_ECP_BAD_INPUT_DATA if \p X_limbs is not the
+ *                  limb size that sores a 512-bit MPI.
+ */
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p256_raw(mbedtls_mpi_uint *X, size_t X_limbs);
+
+#endif
+
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
 
 /** Fast quasi-reduction modulo p521 = 2^521 - 1 (FIPS 186-3 D.2.5)
diff --git a/scripts/mbedtls_dev/ecp.py b/scripts/mbedtls_dev/ecp.py
index 354b234..ffe48fc 100644
--- a/scripts/mbedtls_dev/ecp.py
+++ b/scripts/mbedtls_dev/ecp.py
@@ -145,6 +145,79 @@
         return True
 
 
+class EcpP256R1Raw(bignum_common.ModOperationCommon,
+                   EcpTarget):
+    """Test cases for ECP P256 fast reduction."""
+    symbol = "-"
+    test_function = "ecp_mod_p256_raw"
+    test_name = "ecp_mod_p256_raw"
+    input_style = "fixed"
+    arity = 1
+
+    moduli = ["ffffffff00000001000000000000000000000000ffffffffffffffffffffffff"] # type: List[str]
+
+    input_values = [
+        "0", "1",
+
+        # Modulus - 1
+        "ffffffff00000001000000000000000000000000fffffffffffffffffffffffe",
+
+        # Maximum canonical P256 multiplication result
+        ("fffffffe00000002fffffffe0000000100000001fffffffe00000001fffffffc"
+         "00000003fffffffcfffffffffffffffffffffffc000000000000000000000004"),
+
+        # Generate an overflow during reduction
+        ("0000000000000000000000010000000000000000000000000000000000000000"
+         "00000000000000000000000000000000000000000000000000000000ffffffff"),
+
+        # Generate an underflow during reduction
+        ("0000000000000000000000000000000000000000000000000000000000000010"
+         "ffffffff00000000000000000000000000000000000000000000000000000000"),
+
+        # Generate an overflow during carry reduction
+        ("aaaaaaaa00000000000000000000000000000000000000000000000000000000"
+         "00000000000000000000000000000000aaaaaaacaaaaaaaaaaaaaaaa00000000"),
+
+        # Generate an underflow during carry reduction
+        ("000000000000000000000001ffffffff00000000000000000000000000000000"
+         "0000000000000000000000000000000000000002000000020000000100000002"),
+
+        # First 8 number generated by random.getrandbits(512) - seed(2,2)
+        ("4067c3584ee207f8da94e3e8ab73738fcf1822ffbc6887782b491044d5e34124"
+         "5c6e433715ba2bdd177219d30e7a269fd95bafc8f2a4d27bdcf4bb99f4bea973"),
+        ("82523e86feac7eb7dc38f519b91751dacdbd47d364be8049a372db8f6e405d93"
+         "ffed9235288bc781ae66267594c9c9500925e4749b575bd13653f8dd9b1f282e"),
+        ("e8624fab5186ee32ee8d7ee9770348a05d300cb90706a045defc044a09325626"
+         "e6b58de744ab6cce80877b6f71e1f6d2ef8acd128b4f2fc15f3f57ebf30b94fa"),
+        ("829a48d422fe99a22c70501e533c91352d3d854e061b90303b08c6e33c729578"
+         "2d6c797f8f7d9b782a1be9cd8697bbd0e2520e33e44c50556c71c4a66148a86f"),
+        ("e89204e2e8168561867e5e15bc01bfce6a27e0dfcbf8754472154e76e4c11ab2"
+         "fec3f6b32e8d4b8a8f54f8ceacaab39e83844b40ffa9b9f15c14bc4a829e07b0"),
+        ("bd143fa9b714210c665d7435c1066932f4767f26294365b2721dea3bf63f23d0"
+         "dbe53fcafb2147df5ca495fa5a91c89b97eeab64ca2ce6bc5d3fd983c34c769f"),
+        ("74667bffe202849da9643a295a9ac6decbd4d3e2d4dec9ef83f0be4e80371eb9"
+         "7f81375eecc1cb6347733e847d718d733ff98ff387c56473a7a83ee0761ebfd2"),
+        ("d08f1bb2531d6460f0caeef038c89b38a8acb5137c9260dc74e088a9b9492f25"
+         "8ebdbfe3eb9ac688b9d39cca91551e8259cc60b17604e4b4e73695c3e652c71a"),
+
+        # Next 2 number generated by random.getrandbits(256)
+        "c5e2486c44a4a8f69dc8db48e86ec9c6e06f291b2a838af8d5c44a4eb3172062",
+        "d4c0dca8b4c9e755cc9c3adcf515a8234da4daeb4f3f87777ad1f45ae9500ec9"
+    ]
+
+    @property
+    def arg_a(self) -> str:
+        return super().format_arg('{:x}'.format(self.int_a)).zfill(2 * self.hex_digits)
+
+    def result(self) -> List[str]:
+        result = self.int_a % self.int_n
+        return [self.format_result(result)]
+
+    @property
+    def is_valid(self) -> bool:
+        return True
+
+
 class EcpP521R1Raw(bignum_common.ModOperationCommon,
                    EcpTarget):
     """Test cases for ecp quasi_reduction()."""
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index ee9f157..ecb3546 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -9,8 +9,7 @@
 #include "bignum_mod_raw_invasive.h"
 
 #if defined(MBEDTLS_TEST_HOOKS) &&                  \
-    (defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||  \
-    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED))
+    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 #define HAVE_FIX_NEGATIVE
 #endif
 
@@ -1389,6 +1388,49 @@
 /* END_CASE */
 
 /* BEGIN_CASE depends_on:MBEDTLS_TEST_HOOKS */
+void ecp_mod_p256_raw(char *input_N,
+                      char *input_X,
+                      char *result)
+{
+    mbedtls_mpi_uint *X = NULL;
+    mbedtls_mpi_uint *N = NULL;
+    mbedtls_mpi_uint *res = NULL;
+    size_t limbs_X;
+    size_t limbs_N;
+    size_t limbs_res;
+
+    mbedtls_mpi_mod_modulus m;
+    mbedtls_mpi_mod_modulus_init(&m);
+
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&X,   &limbs_X,   input_X), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&N,   &limbs_N,   input_N), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&res, &limbs_res, result),  0);
+
+    size_t limbs = limbs_N;
+    size_t bytes = limbs * sizeof(mbedtls_mpi_uint);
+
+    TEST_EQUAL(limbs_X, 2 * limbs);
+    TEST_EQUAL(limbs_res, limbs);
+
+    TEST_EQUAL(mbedtls_mpi_mod_modulus_setup(
+                   &m, N, limbs,
+                   MBEDTLS_MPI_MOD_REP_MONTGOMERY), 0);
+
+    TEST_EQUAL(mbedtls_ecp_mod_p256_raw(X, limbs_X), 0);
+    TEST_LE_U(mbedtls_mpi_core_bitlen(X, limbs_X), 256);
+    mbedtls_mpi_mod_raw_fix_quasi_reduction(X, &m);
+    ASSERT_COMPARE(X, bytes, res, bytes);
+
+exit:
+    mbedtls_free(X);
+    mbedtls_free(res);
+
+    mbedtls_mpi_mod_modulus_free(&m);
+    mbedtls_free(N);
+}
+/* END_CASE */
+
+/* BEGIN_CASE depends_on:MBEDTLS_TEST_HOOKS */
 void ecp_mod_p521_raw(char *input_N,
                       char *input_X,
                       char *result)