ecp_curves: Minor rework for p384

This patch adjusts formatting, documentation and testing.

Signed-off-by: Minos Galanakis <minos.galanakis@arm.com>
diff --git a/library/ecp_curves.c b/library/ecp_curves.c
index 179016b..c23ff2c 100644
--- a/library/ecp_curves.c
+++ b/library/ecp_curves.c
@@ -4586,7 +4586,7 @@
 #if defined(MBEDTLS_ECP_DP_SECP384R1_ENABLED)
 static int ecp_mod_p384(mbedtls_mpi *);
 MBEDTLS_STATIC_TESTABLE
-int mbedtls_ecp_mod_p384_raw(mbedtls_mpi_uint *N_p, size_t N_n);
+int mbedtls_ecp_mod_p384_raw(mbedtls_mpi_uint *X, size_t X_limbs);
 #endif
 #if defined(MBEDTLS_ECP_DP_SECP521R1_ENABLED)
 static int ecp_mod_p521(mbedtls_mpi *);
@@ -5207,69 +5207,70 @@
     INIT(384);
 
     ADD(12); ADD(21); ADD(20);
-    SUB(23);                                              NEXT;   // A0
+    SUB(23);                                                NEXT; // A0
 
     ADD(13); ADD(22); ADD(23);
-    SUB(12); SUB(20);                                   NEXT;     // A1
+    SUB(12); SUB(20);                                       NEXT; // A1
 
     ADD(14); ADD(23);
-    SUB(13); SUB(21);                                   NEXT;     // A2
+    SUB(13); SUB(21);                                       NEXT; // A2
 
     ADD(15); ADD(12); ADD(20); ADD(21);
-    SUB(14); SUB(22); SUB(23);                        NEXT;       // A3
+    SUB(14); SUB(22); SUB(23);                              NEXT; // A3
 
     ADD(21); ADD(21); ADD(16); ADD(13); ADD(12); ADD(20); ADD(22);
-    SUB(15); SUB(23); SUB(23);                        NEXT;       // A4
+    SUB(15); SUB(23); SUB(23);                              NEXT; // A4
 
     ADD(22); ADD(22); ADD(17); ADD(14); ADD(13); ADD(21); ADD(23);
-    SUB(16);                                              NEXT;   // A5
+    SUB(16);                                                NEXT; // A5
 
     ADD(23); ADD(23); ADD(18); ADD(15); ADD(14); ADD(22);
-    SUB(17);                                              NEXT;   // A6
+    SUB(17);                                                NEXT; // A6
 
     ADD(19); ADD(16); ADD(15); ADD(23);
-    SUB(18);                                              NEXT;   // A7
+    SUB(18);                                                NEXT; // A7
 
     ADD(20); ADD(17); ADD(16);
-    SUB(19);                                              NEXT;   // A8
+    SUB(19);                                                NEXT; // A8
 
     ADD(21); ADD(18); ADD(17);
-    SUB(20);                                              NEXT;   // A9
+    SUB(20);                                                NEXT; // A9
 
     ADD(22); ADD(19); ADD(18);
-    SUB(21);                                              NEXT;   // A10
+    SUB(21);                                                NEXT; // A10
 
     ADD(23); ADD(20); ADD(19);
     SUB(22);                                                      // A11
 
     RESET;
 
+    /* Use 2^384 = P + 2^128 + 2^96 - 2^32 + 1 to modulo reduce the final carry */
     ADD_LAST; NEXT;                                               // A0
     SUB_LAST; NEXT;                                               // A1
-    NEXT;                                                         // A2
+    ;         NEXT;                                               // A2
     ADD_LAST; NEXT;                                               // A3
     ADD_LAST; NEXT;                                               // A4
-    NEXT;                                                         // A5
-    NEXT;                                                         // A6
-    NEXT;                                                         // A7
-    NEXT;                                                         // A8
-    NEXT;                                                         // A9
-    NEXT;                                                         // A10
+    ;         NEXT;                                               // A5
+    ;         NEXT;                                               // A6
+    ;         NEXT;                                               // A7
+    ;         NEXT;                                               // A8
+    ;         NEXT;                                               // A9
+    ;         NEXT;                                               // A10
                                                                   // A11
 
     RESET;
 
     ADD_LAST; NEXT;                                               // A0
     SUB_LAST; NEXT;                                               // A1
-    NEXT;                                                         // A2
+    ;         NEXT;                                               // A2
     ADD_LAST; NEXT;                                               // A3
     ADD_LAST; NEXT;                                               // A4
-    NEXT;                                                         // A5
-    NEXT;                                                         // A6
-    NEXT;                                                         // A7
-    NEXT;                                                         // A8
-    NEXT;                                                         // A9
-    NEXT;                                                         // A10
+    ;         NEXT;                                               // A5
+    ;         NEXT;                                               // A6
+    ;         NEXT;                                               // A7
+    ;         NEXT;                                               // A8
+    ;         NEXT;                                               // A9
+    ;         NEXT;                                               // A10
                                                                   // A11
 
     LAST;
diff --git a/library/ecp_invasive.h b/library/ecp_invasive.h
index 501152c..d2ac20a 100644
--- a/library/ecp_invasive.h
+++ b/library/ecp_invasive.h
@@ -165,7 +165,7 @@
 /** Fast quasi-reduction modulo p384 (FIPS 186-3 D.2.4)
  *
  * \param[in,out]   X       The address of the MPI to be converted.
- *                          Must have exact limb size of `(766 / biL) + 1`.
+ *                          Must have exact limb size of `768 / biL`.
  *                          Upon return holds the reduced value which is
  *                          in range `0 <= X < 2 * N` (where N is the modulus).
  *                          The bitlength of the reduced value is the same as
diff --git a/scripts/mbedtls_dev/ecp.py b/scripts/mbedtls_dev/ecp.py
index 6d3bb05..10fcc5e 100644
--- a/scripts/mbedtls_dev/ecp.py
+++ b/scripts/mbedtls_dev/ecp.py
@@ -144,12 +144,13 @@
     def is_valid(self) -> bool:
         return True
 
-class EcpPp384R1Raw(bignum_common.ModOperationCommon,
-                    EcpTarget):
+
+class EcpP384R1Raw(bignum_common.ModOperationCommon,
+                   EcpTarget):
     """Test cases for ecp quasi_reduction modulo p384."""
     test_function = "ecp_mod_p384_raw"
     test_name = "ecp_mod_p384_raw"
-    input_style = "arch_split"
+    input_style = "fixed"
     arity = 1
 
     moduli = [("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
@@ -164,7 +165,7 @@
          "fffffff0000000000000000fffffffe"),
 
         # Maximum canonical P384 multiplication result
-        ("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        ("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
          "fdfffffffe0000000000000001fffffffc0000000000000000000000000000000"
          "10000000200000000fffffffe000000020000000400000000fffffffc00000004"),
 
@@ -178,6 +179,16 @@
          "b68abef41dbd35183a0614fb7222606ffffffff84396eee542f18a9189d94396c"
          "784059c17a9f18f807214ef32f2f10ffffffff8a77fac20000000000000000"),
 
+        # Testing with overflow in A(23) + A(20) + A(19) - A(22);
+        ("783753f8a5afba6c1862eead1deb2fcdd907272be3ffd18542b24a71ee8b26ca"
+         "b0aa33513610ff973042bbe1637cc9fc99ad36c7f703514572cf4f5c3044469a"
+         "8f5be6312c19e5d3f8fc1ac6ffffffffffffffff8c86252400000000ffffffff"),
+
+        # Testing with underflow in A(23) + A(20) + A(19) - A(22);
+        ("65e1d2362fce922663b7fd517586e88842a9b4bd092e93e6251c9c69f278cbf8"
+         "285d99ae3b53da5ba36e56701e2b17c225f1239556c5f00117fa140218b46ebd8"
+         "e34f50d0018701fa8a0a5cc00000000000000004410bcb4ffffffff00000000"),
+
         # First 8 number generated by random.getrandbits(768) - seed(2,2)
         ("ffed9235288bc781ae66267594c9c9500925e4749b575bd13653f8dd9b1f282e"
          "4067c3584ee207f8da94e3e8ab73738fcf1822ffbc6887782b491044d5e34124"
@@ -213,9 +224,7 @@
 
     @property
     def arg_a(self) -> str:
-        hex_digits = bignum_common.hex_digits_for_limb((766 // self.bits_in_limb) + 1,
-                                                       self.bits_in_limb)
-        return super().format_arg('{:x}'.format(self.int_a)).zfill(hex_digits)
+        return super().format_arg('{:x}'.format(self.int_a)).zfill(2 * self.hex_digits)
 
     def result(self) -> List[str]:
         result = self.int_a % self.int_n
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index 440a1e3..78aca8a 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -1452,7 +1452,7 @@
     size_t limbs = limbs_N;
     size_t bytes = limbs * sizeof(mbedtls_mpi_uint);
 
-    TEST_EQUAL(limbs_X, (766 / biL) + 1);
+    TEST_EQUAL(limbs_X, 2 * limbs);
     TEST_EQUAL(limbs_res, limbs);
 
     TEST_EQUAL(mbedtls_mpi_mod_modulus_setup(