Merge pull request #7184 from gabor-mezei-arm/6349_Secp224r1_fast_reduction

Extract Secp224r1 fast reduction from the prototype
diff --git a/library/ecp_curves.c b/library/ecp_curves.c
index b352e76..81ad73a 100644
--- a/library/ecp_curves.c
+++ b/library/ecp_curves.c
@@ -4575,6 +4575,8 @@
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
 static int ecp_mod_p224(mbedtls_mpi *);
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p224_raw(mbedtls_mpi_uint *X, size_t X_limbs);
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
 static int ecp_mod_p256(mbedtls_mpi *);
@@ -4951,6 +4953,173 @@
 #if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) ||   \
     defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
     defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
+
+/*
+ * The reader is advised to first understand ecp_mod_p192() since the same
+ * general structure is used here, but with additional complications:
+ * (1) chunks of 32 bits, and (2) subtractions.
+ */
+
+/*
+ * For these primes, we need to handle data in chunks of 32 bits.
+ * This makes it more complicated if we use 64 bits limbs in MPI,
+ * which prevents us from using a uniform access method as for p192.
+ *
+ * So, we define a mini abstraction layer to access 32 bit chunks,
+ * load them in 'cur' for work, and store them back from 'cur' when done.
+ *
+ * While at it, also define the size of N in terms of 32-bit chunks.
+ */
+#define LOAD32      cur = A(i);
+
+#if defined(MBEDTLS_HAVE_INT32)  /* 32 bit */
+
+#define MAX32       X_limbs
+#define A(j)        X[j]
+#define STORE32     X[i] = (mbedtls_mpi_uint) cur;
+#define STORE0      X[i] = 0;
+
+#else /* 64 bit */
+
+#define MAX32   X_limbs * 2
+#define A(j)                                                \
+    (j) % 2 ?                                               \
+    (uint32_t) (X[(j) / 2] >> 32) :                         \
+    (uint32_t) (X[(j) / 2])
+#define STORE32                                             \
+    if (i % 2) {                                            \
+        X[i/2] &= 0x00000000FFFFFFFF;                       \
+        X[i/2] |= (uint64_t) (cur) << 32;                   \
+    } else {                                                \
+        X[i/2] &= 0xFFFFFFFF00000000;                       \
+        X[i/2] |= (uint32_t) cur;                           \
+    }
+
+#define STORE0                                              \
+    if (i % 2) {                                            \
+        X[i/2] &= 0x00000000FFFFFFFF;                       \
+    } else {                                                \
+        X[i/2] &= 0xFFFFFFFF00000000;                       \
+    }
+
+#endif
+
+static inline int8_t extract_carry(int64_t cur)
+{
+    return (int8_t) (cur >> 32);
+}
+
+#define ADD(j)    cur += A(j)
+#define SUB(j)    cur -= A(j)
+
+#define ADD_CARRY(cc) cur += (cc)
+#define SUB_CARRY(cc) cur -= (cc)
+
+#define ADD_LAST ADD_CARRY(last_c)
+#define SUB_LAST SUB_CARRY(last_c)
+
+/*
+ * Helpers for the main 'loop'
+ */
+#define INIT(b)                                         \
+    int8_t c = 0, last_c;                               \
+    int64_t cur;                                        \
+    size_t i = 0;                                       \
+    LOAD32;
+
+#define NEXT                                            \
+    c = extract_carry(cur);                             \
+    STORE32; i++; LOAD32;                               \
+    ADD_CARRY(c);
+
+#define RESET                                           \
+    c = extract_carry(cur);                             \
+    last_c = c;                                         \
+    STORE32; i = 0; LOAD32;                             \
+    c = 0;                                              \
+
+#define LAST                                            \
+    c = extract_carry(cur);                             \
+    STORE32; i++;                                       \
+    if (c != 0)                                         \
+    return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;              \
+    while (i < MAX32) { STORE0; i++; }
+
+#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
+
+/*
+ * Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
+ */
+static int ecp_mod_p224(mbedtls_mpi *N)
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+    size_t expected_width =  2 * 224 / biL;
+    MBEDTLS_MPI_CHK(mbedtls_mpi_grow(N, expected_width));
+    ret = mbedtls_ecp_mod_p224_raw(N->p, expected_width);
+cleanup:
+    return ret;
+}
+
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p224_raw(mbedtls_mpi_uint *X, size_t X_limbs)
+{
+    if (X_limbs != 2 * 224 / biL) {
+        return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+    }
+
+    INIT(224);
+
+    SUB(7);  SUB(11);           NEXT;   // A0 += -A7  - A11
+    SUB(8);  SUB(12);           NEXT;   // A1 += -A8  - A12
+    SUB(9);  SUB(13);           NEXT;   // A2 += -A9  - A13
+    SUB(10); ADD(7);  ADD(11);  NEXT;   // A3 += -A10 + A7 + A11
+    SUB(11); ADD(8);  ADD(12);  NEXT;   // A4 += -A11 + A8 + A12
+    SUB(12); ADD(9);  ADD(13);  NEXT;   // A5 += -A12 + A9 + A13
+    SUB(13); ADD(10);                   // A6 += -A13 + A10
+
+    RESET;
+
+    /* Use 2^224 = P + 2^96 - 1 to modulo reduce the final carry */
+    SUB_LAST; NEXT;                     // A0 -= last_c
+    ;         NEXT;                     // A1
+    ;         NEXT;                     // A2
+    ADD_LAST; NEXT;                     // A3 += last_c
+    ;         NEXT;                     // A4
+    ;         NEXT;                     // A5
+                                        // A6
+
+    /* The carry reduction cannot generate a carry
+     * (see commit 73e8553 for details)*/
+
+    LAST;
+
+    return 0;
+}
+
+#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
+
+#undef LOAD32
+#undef MAX32
+#undef A
+#undef STORE32
+#undef STORE0
+#undef ADD
+#undef SUB
+#undef ADD_CARRY
+#undef SUB_CARRY
+#undef ADD_LAST
+#undef SUB_LAST
+#undef INIT
+#undef NEXT
+#undef RESET
+#undef LAST
+
+#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED ||
+          MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
+          MBEDTLS_ECP_DP_SECP384R1_ENABLED */
+
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
+    defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /*
  * The reader is advised to first understand ecp_mod_p192() since the same
  * general structure is used here, but with additional complications:
@@ -5071,27 +5240,6 @@
     N->p[bits / 8 / sizeof(mbedtls_mpi_uint)] += msw;
 }
 
-#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
-/*
- * Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
- */
-static int ecp_mod_p224(mbedtls_mpi *N)
-{
-    INIT(224);
-
-    SUB(7); SUB(11);               NEXT;      // A0 += -A7 - A11
-    SUB(8); SUB(12);               NEXT;      // A1 += -A8 - A12
-    SUB(9); SUB(13);               NEXT;      // A2 += -A9 - A13
-    SUB(10); ADD(7); ADD(11);    NEXT;        // A3 += -A10 + A7 + A11
-    SUB(11); ADD(8); ADD(12);    NEXT;        // A4 += -A11 + A8 + A12
-    SUB(12); ADD(9); ADD(13);    NEXT;        // A5 += -A12 + A9 + A13
-    SUB(13); ADD(10);               LAST;     // A6 += -A13 + A10
-
-cleanup:
-    return ret;
-}
-#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
-
 #if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
 /*
  * Fast quasi-reduction modulo p256 (FIPS 186-3 D.2.3)
@@ -5186,8 +5334,7 @@
 #undef NEXT
 #undef LAST
 
-#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED ||
-          MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
+#endif /* MBEDTLS_ECP_DP_SECP256R1_ENABLED ||
           MBEDTLS_ECP_DP_SECP384R1_ENABLED */
 
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
diff --git a/library/ecp_invasive.h b/library/ecp_invasive.h
index aba7cca..1972f8c 100644
--- a/library/ecp_invasive.h
+++ b/library/ecp_invasive.h
@@ -33,8 +33,7 @@
 
 #if defined(MBEDTLS_TEST_HOOKS) && defined(MBEDTLS_ECP_C)
 
-#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) ||   \
-    defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
+#if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||   \
     defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 /* Preconditions:
  *   - bits is a multiple of 64 or is 224
@@ -96,6 +95,28 @@
 
 #endif /* MBEDTLS_ECP_DP_SECP192R1_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED)
+
+/** Fast quasi-reduction modulo p224 (FIPS 186-3 D.2.2)
+ *
+ * \param[in,out]   X       The address of the MPI to be converted.
+ *                          Must have exact limb size that stores a 448-bit MPI
+ *                          (double the bitlength of the modulus).
+ *                          Upon return holds the reduced value which is
+ *                          in range `0 <= X < 2 * N` (where N is the modulus).
+ *                          The bitlength of the reduced value is the same as
+ *                          that of the modulus (224 bits).
+ * \param[in]       X_limbs The length of \p X in limbs.
+ *
+ * \return          \c 0 on success.
+ * \return          #MBEDTLS_ERR_ECP_BAD_INPUT_DATA if \p X_limbs is not the
+ *                  limb size that sores a 448-bit MPI.
+ */
+MBEDTLS_STATIC_TESTABLE
+int mbedtls_ecp_mod_p224_raw(mbedtls_mpi_uint *X, size_t X_limbs);
+
+#endif /* MBEDTLS_ECP_DP_SECP224R1_ENABLED */
+
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
 
 /** Fast quasi-reduction modulo p521 = 2^521 - 1 (FIPS 186-3 D.2.5)
diff --git a/scripts/mbedtls_dev/bignum_common.py b/scripts/mbedtls_dev/bignum_common.py
index 2422175..5319ec6 100644
--- a/scripts/mbedtls_dev/bignum_common.py
+++ b/scripts/mbedtls_dev/bignum_common.py
@@ -74,6 +74,10 @@
     """Return all pair combinations from input values."""
     return [(x, y) for x in values for y in values]
 
+def hex_digits_for_limb(limbs: int, bits_in_limb: int) -> int:
+    """ Retrun the hex digits need for a number of limbs. """
+    return 2 * (limbs * bits_in_limb // 8)
+
 class OperationCommon(test_data_generation.BaseTest):
     """Common features for bignum binary operations.
 
@@ -138,7 +142,7 @@
 
     @property
     def hex_digits(self) -> int:
-        return 2 * (self.limbs * self.bits_in_limb // 8)
+        return hex_digits_for_limb(self.limbs, self.bits_in_limb)
 
     def format_arg(self, val: str) -> str:
         if self.input_style not in self.input_styles:
diff --git a/scripts/mbedtls_dev/ecp.py b/scripts/mbedtls_dev/ecp.py
index 6370d25..354b234 100644
--- a/scripts/mbedtls_dev/ecp.py
+++ b/scripts/mbedtls_dev/ecp.py
@@ -19,11 +19,13 @@
 from . import test_data_generation
 from . import bignum_common
 
+
 class EcpTarget(test_data_generation.BaseTarget):
     #pylint: disable=abstract-method, too-few-public-methods
     """Target for ecp test case generation."""
     target_basename = 'test_suite_ecp.generated'
 
+
 class EcpP192R1Raw(bignum_common.ModOperationCommon,
                    EcpTarget):
     """Test cases for ecp quasi_reduction()."""
@@ -76,6 +78,73 @@
     def is_valid(self) -> bool:
         return True
 
+
+class EcpP224R1Raw(bignum_common.ModOperationCommon,
+                   EcpTarget):
+    """Test cases for ecp quasi_reduction()."""
+    symbol = "-"
+    test_function = "ecp_mod_p224_raw"
+    test_name = "ecp_mod_p224_raw"
+    input_style = "arch_split"
+    arity = 1
+
+    moduli = ["ffffffffffffffffffffffffffffffff000000000000000000000001"] # type: List[str]
+
+    input_values = [
+        "0", "1",
+
+        # Modulus - 1
+        "ffffffffffffffffffffffffffffffff000000000000000000000000",
+
+        # Maximum canonical P224 multiplication result
+        ("fffffffffffffffffffffffffffffffe000000000000000000000000"
+         "00000001000000000000000000000000000000000000000000000000"),
+
+        # Generate an overflow during reduction
+        ("00000000000000000000000000010000000070000000002000001000"
+         "ffffffffffff9fffffffffe00000efff000070000000002000001003"),
+
+        # Generate an underflow during reduction
+        ("00000001000000000000000000000000000000000000000000000000"
+         "00000000000dc0000000000000000001000000010000000100000003"),
+
+        # First 8 number generated by random.getrandbits(448) - seed(2,2)
+        ("da94e3e8ab73738fcf1822ffbc6887782b491044d5e341245c6e4337"
+         "15ba2bdd177219d30e7a269fd95bafc8f2a4d27bdcf4bb99f4bea973"),
+        ("cdbd47d364be8049a372db8f6e405d93ffed9235288bc781ae662675"
+         "94c9c9500925e4749b575bd13653f8dd9b1f282e4067c3584ee207f8"),
+        ("defc044a09325626e6b58de744ab6cce80877b6f71e1f6d2ef8acd12"
+         "8b4f2fc15f3f57ebf30b94fa82523e86feac7eb7dc38f519b91751da"),
+        ("2d6c797f8f7d9b782a1be9cd8697bbd0e2520e33e44c50556c71c4a6"
+         "6148a86fe8624fab5186ee32ee8d7ee9770348a05d300cb90706a045"),
+        ("8f54f8ceacaab39e83844b40ffa9b9f15c14bc4a829e07b0829a48d4"
+         "22fe99a22c70501e533c91352d3d854e061b90303b08c6e33c729578"),
+        ("97eeab64ca2ce6bc5d3fd983c34c769fe89204e2e8168561867e5e15"
+         "bc01bfce6a27e0dfcbf8754472154e76e4c11ab2fec3f6b32e8d4b8a"),
+        ("a7a83ee0761ebfd2bd143fa9b714210c665d7435c1066932f4767f26"
+         "294365b2721dea3bf63f23d0dbe53fcafb2147df5ca495fa5a91c89b"),
+        ("74667bffe202849da9643a295a9ac6decbd4d3e2d4dec9ef83f0be4e"
+         "80371eb97f81375eecc1cb6347733e847d718d733ff98ff387c56473"),
+
+        # Next 2 number generated by random.getrandbits(224)
+        "eb9ac688b9d39cca91551e8259cc60b17604e4b4e73695c3e652c71a",
+        "f0caeef038c89b38a8acb5137c9260dc74e088a9b9492f258ebdbfe3"
+    ]
+
+    @property
+    def arg_a(self) -> str:
+        hex_digits = bignum_common.hex_digits_for_limb(448 // self.bits_in_limb, self.bits_in_limb)
+        return super().format_arg('{:x}'.format(self.int_a)).zfill(hex_digits)
+
+    def result(self) -> List[str]:
+        result = self.int_a % self.int_n
+        return [self.format_result(result)]
+
+    @property
+    def is_valid(self) -> bool:
+        return True
+
+
 class EcpP521R1Raw(bignum_common.ModOperationCommon,
                    EcpTarget):
     """Test cases for ecp quasi_reduction()."""
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index 96537c2..ee9f157 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -9,8 +9,7 @@
 #include "bignum_mod_raw_invasive.h"
 
 #if defined(MBEDTLS_TEST_HOOKS) &&                  \
-    (defined(MBEDTLS_ECP_DP_SECP224R1_ENABLED) ||  \
-    defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||  \
+    (defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED) ||  \
     defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED))
 #define HAVE_FIX_NEGATIVE
 #endif
@@ -1347,6 +1346,49 @@
 /* END_CASE */
 
 /* BEGIN_CASE depends_on:MBEDTLS_TEST_HOOKS */
+void ecp_mod_p224_raw(char *input_N,
+                      char *input_X,
+                      char *result)
+{
+    mbedtls_mpi_uint *X = NULL;
+    mbedtls_mpi_uint *N = NULL;
+    mbedtls_mpi_uint *res = NULL;
+    size_t limbs_X;
+    size_t limbs_N;
+    size_t limbs_res;
+
+    mbedtls_mpi_mod_modulus m;
+    mbedtls_mpi_mod_modulus_init(&m);
+
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&X,   &limbs_X,   input_X), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&N,   &limbs_N,   input_N), 0);
+    TEST_EQUAL(mbedtls_test_read_mpi_core(&res, &limbs_res, result),  0);
+
+    size_t limbs = limbs_N;
+    size_t bytes = limbs * sizeof(mbedtls_mpi_uint);
+
+    TEST_EQUAL(limbs_X, 448 / biL);
+    TEST_EQUAL(limbs_res, limbs);
+
+    TEST_EQUAL(mbedtls_mpi_mod_modulus_setup(
+                   &m, N, limbs,
+                   MBEDTLS_MPI_MOD_REP_MONTGOMERY), 0);
+
+    TEST_EQUAL(mbedtls_ecp_mod_p224_raw(X, limbs_X), 0);
+    TEST_LE_U(mbedtls_mpi_core_bitlen(X, limbs_X), 224);
+    mbedtls_mpi_mod_raw_fix_quasi_reduction(X, &m);
+    ASSERT_COMPARE(X, bytes, res, bytes);
+
+exit:
+    mbedtls_free(X);
+    mbedtls_free(res);
+
+    mbedtls_mpi_mod_modulus_free(&m);
+    mbedtls_free(N);
+}
+/* END_CASE */
+
+/* BEGIN_CASE depends_on:MBEDTLS_TEST_HOOKS */
 void ecp_mod_p521_raw(char *input_N,
                       char *input_X,
                       char *result)