Basic support for Curve448, similar to the current level of support for Curve25519
diff --git a/library/ecp.c b/library/ecp.c
index b41baef..92a188b 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -26,6 +26,7 @@
  * GECC = Guide to Elliptic Curve Cryptography - Hankerson, Menezes, Vanstone
  * FIPS 186-3 http://csrc.nist.gov/publications/fips/fips186-3/fips_186-3.pdf
  * RFC 4492 for the related TLS structures and constants
+ * RFC 7748 for the Curve448 and Curve25519 curve definitions
  *
  * [Curve25519] http://cr.yp.to/ecdh/curve25519-20060209.pdf
  *
@@ -99,7 +100,8 @@
 #define ECP_SHORTWEIERSTRASS
 #endif
 
-#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
+#if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED) || \
+    defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
 #define ECP_MONTGOMERY
 #endif
 
@@ -1852,6 +1854,8 @@
 static int ecp_check_pubkey_mx( const mbedtls_ecp_group *grp, const mbedtls_ecp_point *pt )
 {
     /* [Curve25519 p. 5] Just check X is the correct number of bytes */
+    /* Allow any public value, if it's too big then we'll just reduce it mod p
+     * (RFC 7748 sec. 5 para. 3). */
     if( mbedtls_mpi_size( &pt->X ) > ( grp->nbits + 7 ) / 8 )
         return( MBEDTLS_ERR_ECP_INVALID_KEY );
 
@@ -1887,14 +1891,18 @@
 #if defined(ECP_MONTGOMERY)
     if( ecp_get_type( grp ) == ECP_TYPE_MONTGOMERY )
     {
-        /* see [Curve25519] page 5 */
+        /* see RFC 7748 sec. 5 para. 5 */
         if( mbedtls_mpi_get_bit( d, 0 ) != 0 ||
             mbedtls_mpi_get_bit( d, 1 ) != 0 ||
-            mbedtls_mpi_get_bit( d, 2 ) != 0 ||
             mbedtls_mpi_bitlen( d ) - 1 != grp->nbits ) /* mbedtls_mpi_bitlen is one-based! */
             return( MBEDTLS_ERR_ECP_INVALID_KEY );
         else
-            return( 0 );
+
+        /* see [Curve25519] page 5 */
+        if( grp->nbits == 254 && mbedtls_mpi_get_bit( d, 2 ) != 0 )
+            return( MBEDTLS_ERR_ECP_INVALID_KEY );
+
+        return( 0 );
     }
 #endif /* ECP_MONTGOMERY */
 #if defined(ECP_SHORTWEIERSTRASS)
@@ -1941,10 +1949,14 @@
         else
             MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( d, grp->nbits, 1 ) );
 
-        /* Make sure the last three bits are unset */
+        /* Make sure the last two bits are unset for Curve448, three bits for
+           Curve25519 */
         MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( d, 0, 0 ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( d, 1, 0 ) );
-        MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( d, 2, 0 ) );
+        if( grp->nbits == 254 )
+        {
+            MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( d, 2, 0 ) );
+        }
     }
     else
 #endif /* ECP_MONTGOMERY */
diff --git a/library/ecp_curves.c b/library/ecp_curves.c
index df5ac3e..58630e3 100644
--- a/library/ecp_curves.c
+++ b/library/ecp_curves.c
@@ -627,6 +627,9 @@
 #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
 static int ecp_mod_p255( mbedtls_mpi * );
 #endif
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+static int ecp_mod_p448( mbedtls_mpi * );
+#endif
 #if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED)
 static int ecp_mod_p192k1( mbedtls_mpi * );
 #endif
@@ -687,6 +690,52 @@
 }
 #endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+/*
+ * Specialized function for creating the Curve448 group
+ */
+static int ecp_use_curve448( mbedtls_ecp_group *grp )
+{
+    mbedtls_mpi Ns;
+    int ret;
+
+    mbedtls_mpi_init( &Ns );
+
+    /* Actually ( A + 2 ) / 4 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &grp->A, 16, "98AA" ) );
+
+    /* P = 2^448 - 2^224 - 1 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &grp->P, 1 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &grp->P, 224 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &grp->P, &grp->P, 1 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &grp->P, 224 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_int( &grp->P, &grp->P, 1 ) );
+    grp->pbits = mbedtls_mpi_bitlen( &grp->P );
+
+    /* Y intentionally not set, since we use x/z coordinates.
+     * This is used as a marker to identify Montgomery curves! */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &grp->G.X, 5 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &grp->G.Z, 1 ) );
+    mbedtls_mpi_free( &grp->G.Y );
+
+    /* N = 2^446 - 13818066809895115352007386748515426880336692474882178609894547503885 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_set_bit( &grp->N, 446, 1 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_read_string( &Ns, 16,
+                                              "8335DC163BB124B65129C96FDE933D8D723A70AADC873D6D54A7BB0D" ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_sub_mpi( &grp->N, &grp->N, &Ns ) );
+
+    /* Actually, the required msb for private keys */
+    grp->nbits = 447;
+
+cleanup:
+    mbedtls_mpi_free( &Ns );
+    if( ret != 0 )
+        mbedtls_ecp_group_free( grp );
+
+    return( ret );
+}
+#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
+
 /*
  * Set a group using well-known domain parameters
  */
@@ -767,6 +816,12 @@
             return( ecp_use_curve25519( grp ) );
 #endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+        case MBEDTLS_ECP_DP_CURVE448:
+            grp->modp = ecp_mod_p448;
+            return( ecp_use_curve448( grp ) );
+#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
+
         default:
             mbedtls_ecp_group_free( grp );
             return( MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE );
@@ -1176,7 +1231,7 @@
     M.s = 1;
     M.n = N->n - ( P255_WIDTH - 1 );
     if( M.n > P255_WIDTH + 1 )
-        M.n = P255_WIDTH + 1;
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
     M.p = Mp;
     memset( Mp, 0, sizeof Mp );
     memcpy( Mp, N->p + P255_WIDTH - 1, M.n * sizeof( mbedtls_mpi_uint ) );
@@ -1197,6 +1252,77 @@
 }
 #endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
 
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+
+/* Size of p448 in terms of mbedtls_mpi_uint */
+#define P448_WIDTH      ( 448 / 8 / sizeof( mbedtls_mpi_uint ) )
+
+/* Number of limbs fully occupied by 2^224 (max), and limbs used by it (min) */
+#define DIV_ROUND_UP( X, Y ) ( ( ( X ) + ( Y ) - 1 ) / ( Y ) )
+#define P224_WIDTH_MIN   ( 28 / sizeof( mbedtls_mpi_uint ) )
+#define P224_WIDTH_MAX   DIV_ROUND_UP( 28, sizeof( mbedtls_mpi_uint ) )
+#define P224_UNUSED_BITS ( ( P224_WIDTH_MAX * sizeof( mbedtls_mpi_uint ) * 8 ) - 224 )
+
+/*
+ * Fast quasi-reduction modulo p448 = 2^448 - 2^224 - 1
+ * Write N as A0 + 2^448 A1 and A1 as B0 + 2^224 B1, and return
+ * A0 + A1 + B1 + (B0 + B1) * 2^224.  This is different to the reference
+ * implementation of Curve448, which uses its own special 56-bit limbs rather
+ * than a generic bignum library.  We could squeeze some extra speed out on
+ * 32-bit machines by splitting N up into 32-bit limbs and doing the
+ * arithmetic using the limbs directly as we do for the NIST primes above,
+ * but for 64-bit targets it should use half the number of operations if we do
+ * the reduction with 224-bit limbs, since mpi_add_mpi will then use 64-bit adds.
+ */
+static int ecp_mod_p448( mbedtls_mpi *N )
+{
+    int ret;
+    size_t i;
+    mbedtls_mpi M, Q;
+    mbedtls_mpi_uint Mp[P448_WIDTH + 1], Qp[P448_WIDTH];
+
+    if( N->n <= P448_WIDTH )
+        return( 0 );
+
+    /* M = A1 */
+    M.s = 1;
+    M.n = N->n - ( P448_WIDTH );
+    if( M.n > P448_WIDTH )
+        /* Shouldn't be called with N larger than 2^896! */
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+    M.p = Mp;
+    memset( Mp, 0, sizeof( Mp ) );
+    memcpy( Mp, N->p + P448_WIDTH, M.n * sizeof( mbedtls_mpi_uint ) );
+
+    /* N = A0 */
+    for( i = P448_WIDTH; i < N->n; i++ )
+        N->p[i] = 0;
+
+    /* N += A1 */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( N, N, &M ) );
+
+    /* Q = B1, N += B1 */
+    Q = M;
+    Q.p = Qp;
+    memcpy( Qp, Mp, sizeof( Qp ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_r( &Q, 224 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( N, N, &Q ) );
+
+    /* M = (B0 + B1) * 2^224, N += M */
+    if( sizeof( mbedtls_mpi_uint ) > 4 )
+        Mp[P224_WIDTH_MIN] &= ( (mbedtls_mpi_uint)-1 ) >> ( P224_UNUSED_BITS );
+    for( i = P224_WIDTH_MAX; i < M.n; ++i )
+        Mp[i] = 0;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &M, &M, &Q ) );
+    M.n = P448_WIDTH + 1; /* Make room for shifted carry bit from the addition */
+    MBEDTLS_MPI_CHK( mbedtls_mpi_shift_l( &M, 224 ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( N, N, &M ) );
+
+cleanup:
+    return( ret );
+}
+#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
+
 #if defined(MBEDTLS_ECP_DP_SECP192K1_ENABLED) ||   \
     defined(MBEDTLS_ECP_DP_SECP224K1_ENABLED) ||   \
     defined(MBEDTLS_ECP_DP_SECP256K1_ENABLED)
diff --git a/library/version_features.c b/library/version_features.c
index 1b06ff3..a452caf 100644
--- a/library/version_features.c
+++ b/library/version_features.c
@@ -309,6 +309,9 @@
 #if defined(MBEDTLS_ECP_DP_CURVE25519_ENABLED)
     "MBEDTLS_ECP_DP_CURVE25519_ENABLED",
 #endif /* MBEDTLS_ECP_DP_CURVE25519_ENABLED */
+#if defined(MBEDTLS_ECP_DP_CURVE448_ENABLED)
+    "MBEDTLS_ECP_DP_CURVE448_ENABLED",
+#endif /* MBEDTLS_ECP_DP_CURVE448_ENABLED */
 #if defined(MBEDTLS_ECP_NIST_OPTIM)
     "MBEDTLS_ECP_NIST_OPTIM",
 #endif /* MBEDTLS_ECP_NIST_OPTIM */