Merge pull request #5637 from ronald-cron-arm/version-negotiation-1

TLS 1.2/1.3 version negotiation - 1
diff --git a/ChangeLog.d/add_handshake_completion_accessor b/ChangeLog.d/add_handshake_completion_accessor
new file mode 100644
index 0000000..e2b28cf
--- /dev/null
+++ b/ChangeLog.d/add_handshake_completion_accessor
@@ -0,0 +1,4 @@
+Features
+   * Add function mbedtls_ssl_is_handshake_over() to enable querying if the SSL
+     Handshake has completed or not, and thus whether to continue calling
+     mbedtls_ssl_handshake_step(), requested in #4383
diff --git a/ChangeLog.d/mbedtls_ecp_export.txt b/ChangeLog.d/mbedtls_ecp_export.txt
new file mode 100644
index 0000000..4b5d7d4
--- /dev/null
+++ b/ChangeLog.d/mbedtls_ecp_export.txt
@@ -0,0 +1,3 @@
+Features
+   * Add mbedtls_ecp_export() function to export ECP
+     keypair parameters. Fixes #4838.
diff --git a/ChangeLog.d/mbedtls_sha512_a64_crypto_acceleration.txt b/ChangeLog.d/mbedtls_sha512_a64_crypto_acceleration.txt
new file mode 100644
index 0000000..01be0b3
--- /dev/null
+++ b/ChangeLog.d/mbedtls_sha512_a64_crypto_acceleration.txt
@@ -0,0 +1,2 @@
+Features
+   * A64 crypto extension support for SHA-512
diff --git a/include/mbedtls/check_config.h b/include/mbedtls/check_config.h
index 06ba6b7..45f4cc5 100644
--- a/include/mbedtls/check_config.h
+++ b/include/mbedtls/check_config.h
@@ -605,6 +605,61 @@
 #error "MBEDTLS_SHA384_C defined without MBEDTLS_SHA512_C"
 #endif
 
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) && \
+    defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+#error "Must only define one of MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#endif
+
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
+    defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+#if !defined(MBEDTLS_SHA512_C)
+#error "MBEDTLS_SHA512_USE_A64_CRYPTO_* defined without MBEDTLS_SHA512_C"
+#endif
+#if defined(MBEDTLS_SHA512_ALT) || defined(MBEDTLS_SHA512_PROCESS_ALT)
+#error "MBEDTLS_SHA512_*ALT can't be used with MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#endif
+/*
+ * Best performance comes from most recent compilers, with intrinsics and -O3.
+ * Must compile with -march=armv8.2-a+sha3, but we can't detect armv8.2-a, and
+ * can't always detect __ARM_FEATURE_SHA512 (notably clang 7-12).
+ *
+ * GCC < 8 won't work at all (lacks the sha512 instructions)
+ * GCC >= 8 uses intrinsics, sets __ARM_FEATURE_SHA512
+ *
+ * Clang < 7 won't work at all (lacks the sha512 instructions)
+ * Clang 7-12 don't have intrinsics (but we work around that with inline
+ *            assembler) or __ARM_FEATURE_SHA512
+ * Clang == 13.0.0 same as clang 12 (only seen on macOS)
+ * Clang >= 13.0.1 has __ARM_FEATURE_SHA512 and intrinsics
+ */
+#if defined(__aarch64__) && !defined(__ARM_FEATURE_SHA512)
+   /* Test Clang first, as it defines __GNUC__ */
+#  if defined(__clang__)
+#    if __clang_major__ < 7
+#      error "A more recent Clang is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#    elif __clang_major__ < 13 || \
+         (__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0)
+       /* We implement the intrinsics with inline assembler, so don't error */
+#    else
+#      error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#    endif
+#  elif defined(__GNUC__)
+#    if __GNUC__ < 8
+#      error "A more recent GCC is required for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#    else
+#      error "Must use minimum -march=armv8.2-a+sha3 for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#    endif
+#  else
+#    error "Only GCC and Clang supported for MBEDTLS_SHA512_USE_A64_CRYPTO_*"
+#  endif
+#endif
+
+#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
+
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY) && !defined(__aarch64__)
+#error "MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY defined on non-Aarch64 system"
+#endif
+
 #if defined(MBEDTLS_SHA224_C) && !defined(MBEDTLS_SHA256_C)
 #error "MBEDTLS_SHA224_C defined without MBEDTLS_SHA256_C"
 #endif
diff --git a/include/mbedtls/ecp.h b/include/mbedtls/ecp.h
index 9895573..395db14 100644
--- a/include/mbedtls/ecp.h
+++ b/include/mbedtls/ecp.h
@@ -1278,6 +1278,26 @@
         const mbedtls_ecp_keypair *pub, const mbedtls_ecp_keypair *prv,
         int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
 
+/**
+ * \brief           This function exports generic key-pair parameters.
+ *
+ * \param key       The key pair to export from.
+ * \param grp       Slot for exported ECP group.
+ *                  It must point to an initialized ECP group.
+ * \param d         Slot for the exported secret value.
+ *                  It must point to an initialized mpi.
+ * \param Q         Slot for the exported public value.
+ *                  It must point to an initialized ECP point.
+ *
+ * \return          \c 0 on success,
+ * \return          #MBEDTLS_ERR_MPI_ALLOC_FAILED on memory-allocation failure.
+ * \return          #MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE if key id doesn't
+ *                  correspond to a known group.
+ * \return          Another negative error code on other kinds of failure.
+ */
+int mbedtls_ecp_export(const mbedtls_ecp_keypair *key, mbedtls_ecp_group *grp,
+                       mbedtls_mpi *d, mbedtls_ecp_point *Q);
+
 #if defined(MBEDTLS_SELF_TEST)
 
 /**
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 996d9f6..2e1d04b 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -2847,6 +2847,60 @@
 #define MBEDTLS_SHA512_C
 
 /**
+ * \def MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+ *
+ * Enable acceleration of the SHA-512 cryptographic hash algorithm with the
+ * Arm A64 cryptographic extensions if they are available at runtime. If not,
+ * it will fall back to the C implementation.
+ *
+ * \note If MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT is defined when building
+ * for a non-Aarch64 build it will be silently ignored.
+ *
+ * \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
+ * Clang >= 7, and \c CFLAGS must be set to a minimum of
+ * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
+ * fastest code.
+ *
+ * \warning MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT cannot be defined at the
+ * same time as MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY.
+ *
+ * Requires: MBEDTLS_SHA512_C.
+ *
+ * Module:  library/sha512.c
+ *
+ * Uncomment to have the library check for the A64 SHA-512 crypto extensions
+ * and use them if available.
+ */
+//#define MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+
+/**
+ * \def MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY
+ *
+ * Enable acceleration of the SHA-512 cryptographic hash algorithm with the
+ * Arm A64 cryptographic extensions, which must be available at runtime (or
+ * an illegal instruction fault will occur).
+ *
+ * \note This allows builds with a smaller code size than with
+ * MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+ *
+ * \note The code uses the SHA-512 Neon intrinsics, so requires GCC >= 8 or
+ * Clang >= 7, and \c CFLAGS must be set to a minimum of
+ * \c -march=armv8.2-a+sha3. An optimisation level of \c -O3 generates the
+ * fastest code.
+ *
+ * \warning MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY cannot be defined at the same
+ * time as MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT.
+ *
+ * Requires: MBEDTLS_SHA512_C.
+ *
+ * Module:  library/sha512.c
+ *
+ * Uncomment to have the library use the A64 SHA-512 crypto extensions
+ * unconditionally.
+ */
+//#define MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY
+
+/**
  * \def MBEDTLS_SSL_CACHE_C
  *
  * Enable simple SSL cache implementation.
diff --git a/include/mbedtls/ssl.h b/include/mbedtls/ssl.h
index a16c8e6..7e9985e 100644
--- a/include/mbedtls/ssl.h
+++ b/include/mbedtls/ssl.h
@@ -4357,11 +4357,40 @@
 int mbedtls_ssl_handshake( mbedtls_ssl_context *ssl );
 
 /**
+ * \brief          After calling mbedtls_ssl_handshake() to start the SSL
+ *                 handshake you can call this function to check whether the
+ *                 handshake is over for a given SSL context. This function
+ *                 should be also used to determine when to stop calling
+ *                 mbedtls_handshake_step() for that context.
+ *
+ * \param ssl      SSL context
+ *
+ * \return         \c 1 if handshake is over, \c 0 if it is still ongoing.
+ */
+static inline int mbedtls_ssl_is_handshake_over( mbedtls_ssl_context *ssl )
+{
+    return( ssl->MBEDTLS_PRIVATE( state ) == MBEDTLS_SSL_HANDSHAKE_OVER );
+}
+
+/**
  * \brief          Perform a single step of the SSL handshake
  *
  * \note           The state of the context (ssl->state) will be at
  *                 the next state after this function returns \c 0. Do not
- *                 call this function if state is MBEDTLS_SSL_HANDSHAKE_OVER.
+ *                 call this function if mbedtls_ssl_is_handshake_over()
+ *                 returns \c 1.
+ *
+ * \warning        Whilst in the past you may have used direct access to the
+ *                 context state (ssl->state) in order to ascertain when to
+ *                 stop calling this function and although you can still do
+ *                 so with something like ssl->MBEDTLS_PRIVATE(state) or by
+ *                 defining MBEDTLS_ALLOW_PRIVATE_ACCESS, this is now
+ *                 considered deprecated and could be broken in any future
+ *                 release. If you still find you have good reason for such
+ *                 direct access, then please do contact the team to explain
+ *                 this (raise an issue or post to the mailing list), so that
+ *                 we can add a solution to your problem that will be
+ *                 guaranteed to work in the future.
  *
  * \param ssl      SSL context
  *
diff --git a/library/ecp.c b/library/ecp.c
index ba76abb..f39cb02 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -3356,6 +3356,26 @@
     return( ret );
 }
 
+/*
+ * Export generic key-pair parameters.
+ */
+int mbedtls_ecp_export(const mbedtls_ecp_keypair *key, mbedtls_ecp_group *grp,
+                       mbedtls_mpi *d, mbedtls_ecp_point *Q)
+{
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
+
+    if( ( ret = mbedtls_ecp_group_copy( grp, &key->grp ) ) != 0 )
+        return ret;
+
+    if( ( ret = mbedtls_mpi_copy( d, &key->d ) ) != 0 )
+        return ret;
+
+    if( ( ret = mbedtls_ecp_copy( Q, &key->Q ) ) != 0 )
+        return ret;
+
+    return 0;
+}
+
 #if defined(MBEDTLS_SELF_TEST)
 
 /*
diff --git a/library/sha256.c b/library/sha256.c
index ff62802..bdc396a 100644
--- a/library/sha256.c
+++ b/library/sha256.c
@@ -49,8 +49,15 @@
       defined(MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY)
 #    include <arm_neon.h>
 #  endif
-#  if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) && defined(__linux__)
-#    include <sys/auxv.h>
+#  if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT)
+#    if defined(__unix__)
+#      if defined(__linux__)
+         /* Our preferred method of detection is getauxval() */
+#        include <sys/auxv.h>
+#      endif
+       /* Use SIGILL on Unix, and fall back to it on Linux */
+#      include <signal.h>
+#    endif
 #  endif
 #elif defined(_M_ARM64)
 #  if defined(MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT) || \
@@ -272,10 +279,10 @@
         uint32x4_t abcd_orig = abcd;
         uint32x4_t efgh_orig = efgh;
 
-        uint32x4_t sched0 = vld1q_u32( (const uint32_t *)( msg + 16 * 0 ) );
-        uint32x4_t sched1 = vld1q_u32( (const uint32_t *)( msg + 16 * 1 ) );
-        uint32x4_t sched2 = vld1q_u32( (const uint32_t *)( msg + 16 * 2 ) );
-        uint32x4_t sched3 = vld1q_u32( (const uint32_t *)( msg + 16 * 3 ) );
+        uint32x4_t sched0 = (uint32x4_t) vld1q_u8( msg + 16 * 0 );
+        uint32x4_t sched1 = (uint32x4_t) vld1q_u8( msg + 16 * 1 );
+        uint32x4_t sched2 = (uint32x4_t) vld1q_u8( msg + 16 * 2 );
+        uint32x4_t sched3 = (uint32x4_t) vld1q_u8( msg + 16 * 3 );
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  /* Will be true if not defined */
                                                /* Untested on BE */
diff --git a/library/sha512.c b/library/sha512.c
index 2b4cc54..71fbff0 100644
--- a/library/sha512.c
+++ b/library/sha512.c
@@ -50,12 +50,128 @@
 #endif /* MBEDTLS_PLATFORM_C */
 #endif /* MBEDTLS_SELF_TEST */
 
+#if defined(__aarch64__)
+#  if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
+      defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+#    include <arm_neon.h>
+#  endif
+#  if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
+#    if defined(__unix__)
+#      if defined(__linux__)
+         /* Our preferred method of detection is getauxval() */
+#        include <sys/auxv.h>
+#      endif
+       /* Use SIGILL on Unix, and fall back to it on Linux */
+#      include <signal.h>
+#    endif
+#  endif
+#elif defined(_M_ARM64)
+#  if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
+      defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+#    include <arm64_neon.h>
+#  endif
+#else
+#  undef MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY
+#  undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+#endif
+
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
+/*
+ * Capability detection code comes early, so we can disable
+ * MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT if no detection mechanism found
+ */
+#if defined(HWCAP_SHA512)
+static int mbedtls_a64_crypto_sha512_determine_support( void )
+{
+    return( ( getauxval( AT_HWCAP ) & HWCAP_SHA512 ) ? 1 : 0 );
+}
+#elif defined(__APPLE__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static int mbedtls_a64_crypto_sha512_determine_support( void )
+{
+    int value = 0;
+    size_t value_len = sizeof(value);
+
+    int ret = sysctlbyname( "hw.optional.armv8_2_sha512", &value, &value_len,
+                            NULL, 0 );
+    return( ret == 0 && value != 0 );
+}
+#elif defined(_M_ARM64)
+/*
+ * As of March 2022, there don't appear to be any PF_ARM_V8_* flags
+ * available to pass to IsProcessorFeaturePresent() to check for
+ * SHA-512 support. So we fall back to the C code only.
+ */
+#if defined(_MSC_VER)
+#pragma message "No mechanism to detect A64_CRYPTO found, using C code only"
+#else
+#warning "No mechanism to detect A64_CRYPTO found, using C code only"
+#endif
+#elif defined(__unix__) && defined(SIG_SETMASK)
+/* Detection with SIGILL, setjmp() and longjmp() */
+#include <signal.h>
+#include <setjmp.h>
+
+#ifndef asm
+#define asm __asm__
+#endif
+
+static jmp_buf return_from_sigill;
+
+/*
+ * A64 SHA512 support detection via SIGILL
+ */
+static void sigill_handler( int signal )
+{
+    (void) signal;
+    longjmp( return_from_sigill, 1 );
+}
+
+static int mbedtls_a64_crypto_sha512_determine_support( void )
+{
+    struct sigaction old_action, new_action;
+
+    sigset_t old_mask;
+    if( sigprocmask( 0, NULL, &old_mask ) )
+        return( 0 );
+
+    sigemptyset( &new_action.sa_mask );
+    new_action.sa_flags = 0;
+    new_action.sa_handler = sigill_handler;
+
+    sigaction( SIGILL, &new_action, &old_action );
+
+    static int ret = 0;
+
+    if( setjmp( return_from_sigill ) == 0 )        /* First return only */
+    {
+        /* If this traps, we will return a second time from setjmp() with 1 */
+        asm( "sha512h q0, q0, v0.2d" : : : "v0" );
+        ret = 1;
+    }
+
+    sigaction( SIGILL, &old_action, NULL );
+    sigprocmask( SIG_SETMASK, &old_mask, NULL );
+
+    return( ret );
+}
+#else
+#warning "No mechanism to detect A64_CRYPTO found, using C code only"
+#undef MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+#endif  /* HWCAP_SHA512, __APPLE__, __unix__ && SIG_SETMASK */
+
+#endif  /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT */
+
 #define SHA512_VALIDATE_RET(cond)                           \
     MBEDTLS_INTERNAL_VALIDATE_RET( cond, MBEDTLS_ERR_SHA512_BAD_INPUT_DATA )
 #define SHA512_VALIDATE(cond)  MBEDTLS_INTERNAL_VALIDATE( cond )
 
 #if !defined(MBEDTLS_SHA512_ALT)
 
+#define SHA512_BLOCK_SIZE 128
+
 #if defined(MBEDTLS_SHA512_SMALLER)
 static void sha512_put_uint64_be( uint64_t n, unsigned char *b, uint8_t i )
 {
@@ -188,9 +304,249 @@
     UL64(0x4CC5D4BECB3E42B6),  UL64(0x597F299CFC657E2A),
     UL64(0x5FCB6FAB3AD6FAEC),  UL64(0x6C44198C4A475817)
 };
+#endif
 
-int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
-                                     const unsigned char data[128] )
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT) || \
+    defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+#  define mbedtls_internal_sha512_process_many_a64_crypto mbedtls_internal_sha512_process_many
+#  define mbedtls_internal_sha512_process_a64_crypto      mbedtls_internal_sha512_process
+#endif
+
+#ifndef asm
+#define asm __asm__
+#endif
+
+/* Accelerated SHA-512 implementation originally written by Simon Tatham for PuTTY,
+ * under the MIT licence; dual-licensed as Apache 2 with his kind permission.
+ */
+
+#if defined(__clang__) && \
+           (__clang_major__ < 13 || \
+           (__clang_major__ == 13 && __clang_minor__ == 0 && __clang_patchlevel__ == 0))
+static inline uint64x2_t vsha512su0q_u64(uint64x2_t x, uint64x2_t y)
+{
+    asm( "sha512su0 %0.2D,%1.2D" : "+w" (x) : "w" (y) );
+    return( x );
+}
+static inline uint64x2_t vsha512su1q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
+{
+    asm( "sha512su1 %0.2D,%1.2D,%2.2D" : "+w" (x) : "w" (y), "w" (z) );
+    return( x );
+}
+static inline uint64x2_t vsha512hq_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
+{
+    asm( "sha512h %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z) );
+    return( x );
+}
+static inline uint64x2_t vsha512h2q_u64(uint64x2_t x, uint64x2_t y, uint64x2_t z)
+{
+    asm( "sha512h2 %0,%1,%2.2D" : "+w" (x) : "w" (y), "w" (z) );
+    return( x );
+}
+#endif  /* __clang__ etc */
+
+static size_t mbedtls_internal_sha512_process_many_a64_crypto(
+                  mbedtls_sha512_context *ctx, const uint8_t *msg, size_t len )
+{
+    uint64x2_t ab = vld1q_u64( &ctx->state[0] );
+    uint64x2_t cd = vld1q_u64( &ctx->state[2] );
+    uint64x2_t ef = vld1q_u64( &ctx->state[4] );
+    uint64x2_t gh = vld1q_u64( &ctx->state[6] );
+
+    size_t processed = 0;
+
+    for ( ;
+          len >= SHA512_BLOCK_SIZE;
+          processed += SHA512_BLOCK_SIZE,
+                msg += SHA512_BLOCK_SIZE,
+                len -= SHA512_BLOCK_SIZE )
+    {
+        uint64x2_t initial_sum, sum, intermed;
+
+        uint64x2_t ab_orig = ab;
+        uint64x2_t cd_orig = cd;
+        uint64x2_t ef_orig = ef;
+        uint64x2_t gh_orig = gh;
+
+        uint64x2_t s0 = (uint64x2_t) vld1q_u8( msg + 16 * 0 );
+        uint64x2_t s1 = (uint64x2_t) vld1q_u8( msg + 16 * 1 );
+        uint64x2_t s2 = (uint64x2_t) vld1q_u8( msg + 16 * 2 );
+        uint64x2_t s3 = (uint64x2_t) vld1q_u8( msg + 16 * 3 );
+        uint64x2_t s4 = (uint64x2_t) vld1q_u8( msg + 16 * 4 );
+        uint64x2_t s5 = (uint64x2_t) vld1q_u8( msg + 16 * 5 );
+        uint64x2_t s6 = (uint64x2_t) vld1q_u8( msg + 16 * 6 );
+        uint64x2_t s7 = (uint64x2_t) vld1q_u8( msg + 16 * 7 );
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  /* assume LE if these not defined; untested on BE */
+        s0 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s0 ) ) );
+        s1 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s1 ) ) );
+        s2 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s2 ) ) );
+        s3 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s3 ) ) );
+        s4 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s4 ) ) );
+        s5 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s5 ) ) );
+        s6 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s6 ) ) );
+        s7 = vreinterpretq_u64_u8( vrev64q_u8( vreinterpretq_u8_u64( s7 ) ) );
+#endif
+
+        /* Rounds 0 and 1 */
+        initial_sum = vaddq_u64( s0, vld1q_u64( &K[0] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), gh );
+        intermed = vsha512hq_u64( sum, vextq_u64( ef, gh, 1 ), vextq_u64( cd, ef, 1 ) );
+        gh = vsha512h2q_u64( intermed, cd, ab );
+        cd = vaddq_u64( cd, intermed );
+
+        /* Rounds 2 and 3 */
+        initial_sum = vaddq_u64( s1, vld1q_u64( &K[2] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ef );
+        intermed = vsha512hq_u64( sum, vextq_u64( cd, ef, 1 ), vextq_u64( ab, cd, 1 ) );
+        ef = vsha512h2q_u64( intermed, ab, gh );
+        ab = vaddq_u64( ab, intermed );
+
+        /* Rounds 4 and 5 */
+        initial_sum = vaddq_u64( s2, vld1q_u64( &K[4] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), cd );
+        intermed = vsha512hq_u64( sum, vextq_u64( ab, cd, 1 ), vextq_u64( gh, ab, 1 ) );
+        cd = vsha512h2q_u64( intermed, gh, ef );
+        gh = vaddq_u64( gh, intermed );
+
+        /* Rounds 6 and 7 */
+        initial_sum = vaddq_u64( s3, vld1q_u64( &K[6] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ab );
+        intermed = vsha512hq_u64( sum, vextq_u64( gh, ab, 1 ), vextq_u64( ef, gh, 1 ) );
+        ab = vsha512h2q_u64( intermed, ef, cd );
+        ef = vaddq_u64( ef, intermed );
+
+        /* Rounds 8 and 9 */
+        initial_sum = vaddq_u64( s4, vld1q_u64( &K[8] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), gh );
+        intermed = vsha512hq_u64( sum, vextq_u64( ef, gh, 1 ), vextq_u64( cd, ef, 1 ) );
+        gh = vsha512h2q_u64( intermed, cd, ab );
+        cd = vaddq_u64( cd, intermed );
+
+        /* Rounds 10 and 11 */
+        initial_sum = vaddq_u64( s5, vld1q_u64( &K[10] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ef );
+        intermed = vsha512hq_u64( sum, vextq_u64( cd, ef, 1 ), vextq_u64( ab, cd, 1 ) );
+        ef = vsha512h2q_u64( intermed, ab, gh );
+        ab = vaddq_u64( ab, intermed );
+
+        /* Rounds 12 and 13 */
+        initial_sum = vaddq_u64( s6, vld1q_u64( &K[12] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), cd );
+        intermed = vsha512hq_u64( sum, vextq_u64( ab, cd, 1 ), vextq_u64( gh, ab, 1 ) );
+        cd = vsha512h2q_u64( intermed, gh, ef );
+        gh = vaddq_u64( gh, intermed );
+
+        /* Rounds 14 and 15 */
+        initial_sum = vaddq_u64( s7, vld1q_u64( &K[14] ) );
+        sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ab );
+        intermed = vsha512hq_u64( sum, vextq_u64( gh, ab, 1 ), vextq_u64( ef, gh, 1 ) );
+        ab = vsha512h2q_u64( intermed, ef, cd );
+        ef = vaddq_u64( ef, intermed );
+
+        for ( unsigned int t = 16; t < 80; t += 16 )
+        {
+            /* Rounds t and t + 1 */
+            s0 = vsha512su1q_u64( vsha512su0q_u64( s0, s1 ), s7, vextq_u64( s4, s5, 1 ) );
+            initial_sum = vaddq_u64( s0, vld1q_u64( &K[t] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), gh );
+            intermed = vsha512hq_u64( sum, vextq_u64( ef, gh, 1 ), vextq_u64( cd, ef, 1 ) );
+            gh = vsha512h2q_u64( intermed, cd, ab );
+            cd = vaddq_u64( cd, intermed );
+
+            /* Rounds t + 2 and t + 3 */
+            s1 = vsha512su1q_u64( vsha512su0q_u64( s1, s2 ), s0, vextq_u64( s5, s6, 1 ) );
+            initial_sum = vaddq_u64( s1, vld1q_u64( &K[t + 2] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ef );
+            intermed = vsha512hq_u64( sum, vextq_u64( cd, ef, 1 ), vextq_u64( ab, cd, 1 ) );
+            ef = vsha512h2q_u64( intermed, ab, gh );
+            ab = vaddq_u64( ab, intermed );
+
+            /* Rounds t + 4 and t + 5 */
+            s2 = vsha512su1q_u64( vsha512su0q_u64( s2, s3 ), s1, vextq_u64( s6, s7, 1 ) );
+            initial_sum = vaddq_u64( s2, vld1q_u64( &K[t + 4] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), cd );
+            intermed = vsha512hq_u64( sum, vextq_u64( ab, cd, 1 ), vextq_u64( gh, ab, 1 ) );
+            cd = vsha512h2q_u64( intermed, gh, ef );
+            gh = vaddq_u64( gh, intermed );
+
+            /* Rounds t + 6 and t + 7 */
+            s3 = vsha512su1q_u64( vsha512su0q_u64( s3, s4 ), s2, vextq_u64( s7, s0, 1 ) );
+            initial_sum = vaddq_u64( s3, vld1q_u64( &K[t + 6] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ab );
+            intermed = vsha512hq_u64( sum, vextq_u64( gh, ab, 1 ), vextq_u64( ef, gh, 1 ) );
+            ab = vsha512h2q_u64( intermed, ef, cd );
+            ef = vaddq_u64( ef, intermed );
+
+            /* Rounds t + 8 and t + 9 */
+            s4 = vsha512su1q_u64( vsha512su0q_u64( s4, s5 ), s3, vextq_u64( s0, s1, 1 ) );
+            initial_sum = vaddq_u64( s4, vld1q_u64( &K[t + 8] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), gh );
+            intermed = vsha512hq_u64( sum, vextq_u64( ef, gh, 1 ), vextq_u64( cd, ef, 1 ) );
+            gh = vsha512h2q_u64( intermed, cd, ab );
+            cd = vaddq_u64( cd, intermed );
+
+            /* Rounds t + 10 and t + 11 */
+            s5 = vsha512su1q_u64( vsha512su0q_u64( s5, s6 ), s4, vextq_u64( s1, s2, 1 ) );
+            initial_sum = vaddq_u64( s5, vld1q_u64( &K[t + 10] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ef );
+            intermed = vsha512hq_u64( sum, vextq_u64( cd, ef, 1 ), vextq_u64( ab, cd, 1 ) );
+            ef = vsha512h2q_u64( intermed, ab, gh );
+            ab = vaddq_u64( ab, intermed );
+
+            /* Rounds t + 12 and t + 13 */
+            s6 = vsha512su1q_u64( vsha512su0q_u64( s6, s7 ), s5, vextq_u64( s2, s3, 1 ) );
+            initial_sum = vaddq_u64( s6, vld1q_u64( &K[t + 12] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), cd );
+            intermed = vsha512hq_u64( sum, vextq_u64( ab, cd, 1 ), vextq_u64( gh, ab, 1 ) );
+            cd = vsha512h2q_u64( intermed, gh, ef );
+            gh = vaddq_u64( gh, intermed );
+
+            /* Rounds t + 14 and t + 15 */
+            s7 = vsha512su1q_u64( vsha512su0q_u64( s7, s0 ), s6, vextq_u64( s3, s4, 1 ) );
+            initial_sum = vaddq_u64( s7, vld1q_u64( &K[t + 14] ) );
+            sum = vaddq_u64( vextq_u64( initial_sum, initial_sum, 1 ), ab );
+            intermed = vsha512hq_u64( sum, vextq_u64( gh, ab, 1 ), vextq_u64( ef, gh, 1 ) );
+            ab = vsha512h2q_u64( intermed, ef, cd );
+            ef = vaddq_u64( ef, intermed );
+        }
+
+        ab = vaddq_u64( ab, ab_orig );
+        cd = vaddq_u64( cd, cd_orig );
+        ef = vaddq_u64( ef, ef_orig );
+        gh = vaddq_u64( gh, gh_orig );
+    }
+
+    vst1q_u64( &ctx->state[0], ab );
+    vst1q_u64( &ctx->state[2], cd );
+    vst1q_u64( &ctx->state[4], ef );
+    vst1q_u64( &ctx->state[6], gh );
+
+    return( processed );
+}
+
+int mbedtls_internal_sha512_process_a64_crypto( mbedtls_sha512_context *ctx,
+                              const unsigned char data[SHA512_BLOCK_SIZE] )
+{
+    return( mbedtls_internal_sha512_process_many_a64_crypto( ctx, data,
+                SHA512_BLOCK_SIZE ) == SHA512_BLOCK_SIZE ) ? 0 : -1;
+}
+
+#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT || MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
+
+
+#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
+#define mbedtls_internal_sha512_process_many_c mbedtls_internal_sha512_process_many
+#define mbedtls_internal_sha512_process_c      mbedtls_internal_sha512_process
+#endif
+
+
+#if !defined(MBEDTLS_SHA512_PROCESS_ALT) && !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+
+int mbedtls_internal_sha512_process_c( mbedtls_sha512_context *ctx,
+                                       const unsigned char data[SHA512_BLOCK_SIZE] )
 {
     int i;
     struct
@@ -291,7 +647,68 @@
     return( 0 );
 }
 
-#endif /* !MBEDTLS_SHA512_PROCESS_ALT */
+#endif /* !MBEDTLS_SHA512_PROCESS_ALT && !MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
+
+
+#if !defined(MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY)
+
+static size_t mbedtls_internal_sha512_process_many_c(
+                  mbedtls_sha512_context *ctx, const uint8_t *data, size_t len)
+{
+    size_t processed = 0;
+
+    while( len >= SHA512_BLOCK_SIZE )
+    {
+        if( mbedtls_internal_sha512_process_c( ctx, data ) != 0)
+            return( 0 );
+
+        data += SHA512_BLOCK_SIZE;
+        len  -= SHA512_BLOCK_SIZE;
+
+        processed += SHA512_BLOCK_SIZE;
+    }
+
+    return( processed );
+}
+
+#endif /* !MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY */
+
+
+#if defined(MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT)
+
+int mbedtls_a64_crypto_sha512_has_support( void )
+{
+    static int done = 0;
+    static int supported = 0;
+
+    if( !done )
+    {
+        supported = mbedtls_a64_crypto_sha512_determine_support();
+        done = 1;
+    }
+
+    return( supported );
+}
+
+static size_t mbedtls_internal_sha512_process_many( mbedtls_sha512_context *ctx,
+                  const uint8_t *msg, size_t len )
+{
+    if( mbedtls_a64_crypto_sha512_has_support() )
+        return( mbedtls_internal_sha512_process_many_a64_crypto( ctx, msg, len ) );
+    else
+        return( mbedtls_internal_sha512_process_many_c( ctx, msg, len ) );
+}
+
+int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
+        const unsigned char data[SHA512_BLOCK_SIZE] )
+{
+    if( mbedtls_a64_crypto_sha512_has_support() )
+        return( mbedtls_internal_sha512_process_a64_crypto( ctx, data ) );
+    else
+        return( mbedtls_internal_sha512_process_c( ctx, data ) );
+}
+
+#endif /* MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT */
 
 /*
  * SHA-512 process buffer
@@ -311,7 +728,7 @@
         return( 0 );
 
     left = (unsigned int) (ctx->total[0] & 0x7F);
-    fill = 128 - left;
+    fill = SHA512_BLOCK_SIZE - left;
 
     ctx->total[0] += (uint64_t) ilen;
 
@@ -330,13 +747,15 @@
         left = 0;
     }
 
-    while( ilen >= 128 )
+    while( ilen >= SHA512_BLOCK_SIZE )
     {
-        if( ( ret = mbedtls_internal_sha512_process( ctx, input ) ) != 0 )
-            return( ret );
+        size_t processed =
+            mbedtls_internal_sha512_process_many( ctx, input, ilen );
+        if( processed < SHA512_BLOCK_SIZE )
+            return( MBEDTLS_ERR_ERROR_GENERIC_ERROR );
 
-        input += 128;
-        ilen  -= 128;
+        input += processed;
+        ilen  -= processed;
     }
 
     if( ilen > 0 )
@@ -373,7 +792,7 @@
     else
     {
         /* We'll need an extra block */
-        memset( ctx->buffer + used, 0, 128 - used );
+        memset( ctx->buffer + used, 0, SHA512_BLOCK_SIZE - used );
 
         if( ( ret = mbedtls_internal_sha512_process( ctx, ctx->buffer ) ) != 0 )
             return( ret );
diff --git a/library/ssl_msg.c b/library/ssl_msg.c
index ae3dc13..4eac24b 100644
--- a/library/ssl_msg.c
+++ b/library/ssl_msg.c
@@ -1921,7 +1921,7 @@
         {
             len = in_buf_len - ( ssl->in_hdr - ssl->in_buf );
 
-            if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+            if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
                 timeout = ssl->handshake->retransmit_timeout;
             else
                 timeout = ssl->conf->read_timeout;
@@ -1945,7 +1945,7 @@
             MBEDTLS_SSL_DEBUG_MSG( 2, ( "timeout" ) );
             mbedtls_ssl_set_timer( ssl, 0 );
 
-            if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+            if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
             {
                 if( ssl_double_retransmit_timeout( ssl ) != 0 )
                 {
@@ -2380,7 +2380,7 @@
         return( ret );
 
     /* Update state and set timer */
-    if( ssl->state == MBEDTLS_SSL_HANDSHAKE_OVER )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 1 )
         ssl->handshake->retransmit_state = MBEDTLS_SSL_RETRANS_FINISHED;
     else
     {
@@ -2971,9 +2971,9 @@
         }
 
         if( ssl->handshake != NULL &&
-            ( ( ssl->state   != MBEDTLS_SSL_HANDSHAKE_OVER &&
+           ( ( mbedtls_ssl_is_handshake_over( ssl ) == 0 &&
                 recv_msg_seq != ssl->handshake->in_msg_seq ) ||
-              ( ssl->state  == MBEDTLS_SSL_HANDSHAKE_OVER &&
+             ( mbedtls_ssl_is_handshake_over( ssl ) == 1 &&
                 ssl->in_msg[0] != MBEDTLS_SSL_HS_CLIENT_HELLO ) ) )
         {
             if( recv_msg_seq > ssl->handshake->in_msg_seq )
@@ -3039,7 +3039,7 @@
 {
     mbedtls_ssl_handshake_params * const hs = ssl->handshake;
 
-    if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER && hs != NULL )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 0 && hs != NULL )
     {
         ssl->handshake->update_checksum( ssl, ssl->in_msg, ssl->in_hslen );
     }
@@ -3660,7 +3660,7 @@
      */
     if( rec_epoch == 0 &&
         ssl->conf->endpoint == MBEDTLS_SSL_IS_SERVER &&
-        ssl->state == MBEDTLS_SSL_HANDSHAKE_OVER &&
+        mbedtls_ssl_is_handshake_over( ssl ) == 1 &&
         ssl->in_msgtype == MBEDTLS_SSL_MSG_HANDSHAKE &&
         ssl->in_left > 13 &&
         ssl->in_buf[13] == MBEDTLS_SSL_HS_CLIENT_HELLO )
@@ -4817,7 +4817,7 @@
         /* Drop unexpected ApplicationData records,
          * except at the beginning of renegotiations */
         if( ssl->in_msgtype == MBEDTLS_SSL_MSG_APPLICATION_DATA &&
-            ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER
+            mbedtls_ssl_is_handshake_over( ssl ) == 0
 #if defined(MBEDTLS_SSL_RENEGOTIATION)
             && ! ( ssl->renego_status == MBEDTLS_SSL_RENEGOTIATION_IN_PROGRESS &&
                    ssl->state == MBEDTLS_SSL_SERVER_HELLO )
@@ -4829,7 +4829,7 @@
         }
 
         if( ssl->handshake != NULL &&
-            ssl->state == MBEDTLS_SSL_HANDSHAKE_OVER  )
+            mbedtls_ssl_is_handshake_over( ssl ) == 1 )
         {
             mbedtls_ssl_handshake_wrapup_free_hs_transform( ssl );
         }
@@ -5253,7 +5253,7 @@
     int in_ctr_cmp;
     int out_ctr_cmp;
 
-    if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER ||
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 0 ||
         ssl->renego_status == MBEDTLS_SSL_RENEGOTIATION_PENDING ||
         ssl->conf->disable_renegotiation == MBEDTLS_SSL_RENEGOTIATION_DISABLED )
     {
@@ -5431,7 +5431,7 @@
     }
 #endif
 
-    if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
     {
         ret = mbedtls_ssl_handshake( ssl );
         if( ret != MBEDTLS_ERR_SSL_WAITING_SERVER_HELLO_RENEGO &&
@@ -5542,7 +5542,7 @@
 
         /* We're going to return something now, cancel timer,
          * except if handshake (renegotiation) is in progress */
-        if( ssl->state == MBEDTLS_SSL_HANDSHAKE_OVER )
+        if( mbedtls_ssl_is_handshake_over( ssl ) == 1 )
             mbedtls_ssl_set_timer( ssl, 0 );
 
 #if defined(MBEDTLS_SSL_PROTO_DTLS)
@@ -5686,7 +5686,7 @@
     }
 #endif
 
-    if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
     {
         if( ( ret = mbedtls_ssl_handshake( ssl ) ) != 0 )
         {
@@ -5717,7 +5717,7 @@
     if( ssl->out_left != 0 )
         return( mbedtls_ssl_flush_output( ssl ) );
 
-    if( ssl->state == MBEDTLS_SSL_HANDSHAKE_OVER )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 1 )
     {
         if( ( ret = mbedtls_ssl_send_alert_message( ssl,
                         MBEDTLS_SSL_ALERT_LEVEL_WARNING,
diff --git a/library/ssl_tls.c b/library/ssl_tls.c
index b148485..86445de 100644
--- a/library/ssl_tls.c
+++ b/library/ssl_tls.c
@@ -120,7 +120,7 @@
     *enabled = MBEDTLS_SSL_CID_DISABLED;
 
     if( ssl->conf->transport != MBEDTLS_SSL_TRANSPORT_DATAGRAM ||
-        ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+        mbedtls_ssl_is_handshake_over( ssl ) == 0 )
     {
         return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
     }
@@ -2803,7 +2803,7 @@
     if( ssl            == NULL                       ||
         ssl->conf      == NULL                       ||
         ssl->handshake == NULL                       ||
-        ssl->state     == MBEDTLS_SSL_HANDSHAKE_OVER )
+        mbedtls_ssl_is_handshake_over( ssl ) == 1 )
     {
         return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
     }
@@ -2886,7 +2886,7 @@
     MBEDTLS_SSL_DEBUG_MSG( 2, ( "=> handshake" ) );
 
     /* Main handshake loop */
-    while( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+    while( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
     {
         ret = mbedtls_ssl_handshake_step( ssl );
 
@@ -2986,7 +2986,7 @@
     /* On server, just send the request */
     if( ssl->conf->endpoint == MBEDTLS_SSL_IS_SERVER )
     {
-        if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+        if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
             return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
 
         ssl->renego_status = MBEDTLS_SSL_RENEGOTIATION_PENDING;
@@ -3006,7 +3006,7 @@
      */
     if( ssl->renego_status != MBEDTLS_SSL_RENEGOTIATION_IN_PROGRESS )
     {
-        if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+        if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
             return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
 
         if( ( ret = mbedtls_ssl_start_renegotiation( ssl ) ) != 0 )
@@ -3290,7 +3290,7 @@
      * (only DTLS) but are currently used to simplify the implementation.
      */
     /* The initial handshake must be over */
-    if( ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+    if( mbedtls_ssl_is_handshake_over( ssl ) == 0 )
     {
         MBEDTLS_SSL_DEBUG_MSG( 1, ( "Initial handshake isn't over" ) );
         return( MBEDTLS_ERR_SSL_BAD_INPUT_DATA );
diff --git a/library/threading.c b/library/threading.c
index bae6644..cd9942b 100644
--- a/library/threading.c
+++ b/library/threading.c
@@ -113,7 +113,7 @@
 int (*mbedtls_mutex_unlock)( mbedtls_threading_mutex_t * ) = threading_mutex_unlock_pthread;
 
 /*
- * With phtreads we can statically initialize mutexes
+ * With pthreads we can statically initialize mutexes
  */
 #define MUTEX_INIT  = { PTHREAD_MUTEX_INITIALIZER, 1 }
 
diff --git a/scripts/config.py b/scripts/config.py
index 0ab1e39..7395656 100755
--- a/scripts/config.py
+++ b/scripts/config.py
@@ -198,7 +198,8 @@
     'MBEDTLS_PSA_CRYPTO_SPM', # platform dependency (PSA SPM)
     'MBEDTLS_PSA_INJECT_ENTROPY', # build dependency (hook functions)
     'MBEDTLS_RSA_NO_CRT', # influences the use of RSA in X.509 and TLS
-    'MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY', # interacts with *_USE_A64_CRYPTO_ONLY
+    'MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY', # interacts with *_USE_A64_CRYPTO_IF_PRESENT
+    'MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY', # interacts with *_USE_A64_CRYPTO_IF_PRESENT
     'MBEDTLS_TEST_CONSTANT_FLOW_MEMSAN', # build dependency (clang+memsan)
     'MBEDTLS_TEST_CONSTANT_FLOW_VALGRIND', # build dependency (valgrind headers)
     'MBEDTLS_X509_REMOVE_INFO', # removes a feature
diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
index e732ae4..4d5e857 100755
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@@ -1558,6 +1558,9 @@
     # MBEDTLS_SHA256_*ALT can't be used with MBEDTLS_SHA256_USE_A64_CRYPTO_*
     scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_IF_PRESENT
     scripts/config.py unset MBEDTLS_SHA256_USE_A64_CRYPTO_ONLY
+    # MBEDTLS_SHA512_*ALT can't be used with MBEDTLS_SHA512_USE_A64_CRYPTO_*
+    scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+    scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_ONLY
     # Enable all MBEDTLS_XXX_ALT for whole modules. Do not enable
     # MBEDTLS_XXX_YYY_ALT which are for single functions.
     scripts/config.py set-all 'MBEDTLS_([A-Z0-9]*|NIST_KW)_ALT'
@@ -2742,6 +2745,9 @@
 component_build_armcc () {
     msg "build: ARM Compiler 5"
     scripts/config.py baremetal
+    # armc[56] don't support SHA-512 intrinsics
+    scripts/config.py unset MBEDTLS_SHA512_USE_A64_CRYPTO_IF_PRESENT
+
     make CC="$ARMC5_CC" AR="$ARMC5_AR" WARNING_CFLAGS='--strict --c99' lib
 
     msg "size: ARM Compiler 5"
@@ -2761,7 +2767,7 @@
     # ARM Compiler 6 - Target ARMv8-M
     armc6_build_test "--target=arm-arm-none-eabi -march=armv8-m.main"
 
-    # ARM Compiler 6 - Target ARMv8-A - AArch64
+    # ARM Compiler 6 - Target ARMv8.2-A - AArch64
     armc6_build_test "--target=aarch64-arm-none-eabi -march=armv8.2-a+crypto"
 }
 
@@ -3105,7 +3111,7 @@
     local dd_cmd
     dd_cmd=(dd if=/dev/urandom of=./tests/seedfile bs=64 count=1)
     case $OSTYPE in
-        linux*|freebsd*|openbsd*|darwin*) dd_cmd+=(status=none)
+        linux*|freebsd*|openbsd*) dd_cmd+=(status=none)
     esac
     "${dd_cmd[@]}"
 
diff --git a/tests/suites/test_suite_ecp.data b/tests/suites/test_suite_ecp.data
index 0c30e4a..2eb8c2d 100644
--- a/tests/suites/test_suite_ecp.data
+++ b/tests/suites/test_suite_ecp.data
@@ -882,3 +882,11 @@
 # The first call to fix_negative in the test case of issue #4296.
 ECP fix_negative: #4296.1
 fix_negative:"8A4DD4C8B42C5EAED15FE4F4579F4CE513EC90A94010BF000000000000000000":-1:256
+
+ECP export key parameters #1 (OK)
+depends_on:MBEDTLS_ECP_DP_SECP256R1_ENABLED
+ecp_export:MBEDTLS_ECP_DP_SECP256R1:"37cc56d976091e5a723ec7592dff206eee7cf9069174d0ad14b5f76822596292":"4ee500d82311ffea2fd2345d5d16bd8a88c26b770d55cd8a2a0efa01c8b4edff":"00f12a1320760270a83cbffd53f6031ef76a5d86c8a204f2c30ca9ebf51f0f0ea7":0:0
+
+ECP export key parameters #2 (invalid group)
+depends_on:MBEDTLS_ECP_DP_SECP256R1_ENABLED
+ecp_export:MBEDTLS_ECP_DP_SECP256R1:"37cc56d976091e5a723ec7592dff206eee7cf9069174d0ad14b5f76822596292":"4ee500d82311ffea2fd2345d5d16bd8a88c26b770d55cd8a2a0efa01c8b4edff":"00f12a1320760270a83cbffd53f6031ef76a5d86c8a204f2c30ca9ebf51f0f0ea7":MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE:1
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index 2afc355..c3e6b05 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -16,6 +16,44 @@
     mbedtls_ecp_point_free( x );    \
     mbedtls_ecp_point_init( x );
 
+/* Auxiliary function to compare two mbedtls_ecp_group objects. */
+inline static int mbedtls_ecp_group_cmp( mbedtls_ecp_group *grp1,
+                                         mbedtls_ecp_group *grp2 )
+{
+    if( mbedtls_mpi_cmp_mpi( &grp1->P, &grp2->P ) != 0 )
+        return 1;
+    if( mbedtls_mpi_cmp_mpi( &grp1->A, &grp2->A ) != 0 )
+        return 1;
+    if( mbedtls_mpi_cmp_mpi( &grp1->B, &grp2->B ) != 0 )
+        return 1;
+    if( mbedtls_mpi_cmp_mpi( &grp1->N, &grp2->N ) != 0 )
+        return 1;
+    if( mbedtls_ecp_point_cmp( &grp1->G, &grp2->G ) != 0 )
+        return 1;
+    if( grp1->id != grp2->id )
+        return 1;
+    if( grp1->pbits != grp2->pbits )
+        return 1;
+    if( grp1->nbits != grp2->nbits )
+        return 1;
+    if( grp1->h != grp2->h )
+        return 1;
+    if( grp1->modp != grp2->modp )
+        return 1;
+    if( grp1->t_pre != grp2->t_pre )
+        return 1;
+    if( grp1->t_post != grp2->t_post )
+        return 1;
+    if( grp1->t_data != grp2->t_data )
+        return 1;
+    if( grp1->T_size != grp2->T_size )
+        return 1;
+    if( grp1->T != grp2->T )
+        return 1;
+
+    return 0;
+}
+
 /* END_HEADER */
 
 /* BEGIN_DEPENDENCIES
@@ -988,3 +1026,40 @@
     TEST_ASSERT( mbedtls_ecp_self_test( 1 ) == 0 );
 }
 /* END_CASE */
+
+/* BEGIN_CASE */
+void ecp_export( int id, char * Qx, char * Qy,char * d, int expected_ret, int invalid_grp )
+{
+    mbedtls_ecp_keypair key;
+    mbedtls_ecp_group export_grp;
+    mbedtls_mpi export_d;
+    mbedtls_ecp_point export_Q;
+
+    mbedtls_ecp_group_init( &export_grp );
+    mbedtls_ecp_group_init( &key.grp );
+    mbedtls_mpi_init( &export_d );
+    mbedtls_ecp_point_init( &export_Q );
+
+    mbedtls_ecp_keypair_init( &key );
+    if( invalid_grp == 0 )
+        TEST_ASSERT( mbedtls_ecp_group_load( &key.grp, id ) == 0 );
+    TEST_ASSERT( mbedtls_ecp_point_read_string( &key.Q, 16, Qx, Qy ) == 0 );
+    TEST_ASSERT( mbedtls_test_read_mpi( &key.d, 16, d ) == 0 );
+
+    TEST_EQUAL( mbedtls_ecp_export( &key, &export_grp,
+                                    &export_d, &export_Q ), expected_ret );
+
+    if( expected_ret == 0 )
+    {
+        TEST_EQUAL( mbedtls_ecp_point_cmp( &key.Q, &export_Q ), 0 );
+        TEST_EQUAL( mbedtls_mpi_cmp_mpi( &key.d, &export_d ), 0 );
+        TEST_EQUAL( mbedtls_ecp_group_cmp( &key.grp, &export_grp ), 0 );
+    }
+
+exit:
+    mbedtls_ecp_keypair_free( &key );
+    mbedtls_ecp_group_free( &export_grp );
+    mbedtls_mpi_free( &export_d );
+    mbedtls_ecp_point_free( &export_Q );
+}
+/* END_CASE */
diff --git a/tests/suites/test_suite_ssl.function b/tests/suites/test_suite_ssl.function
index ec50ae5..f71e154 100644
--- a/tests/suites/test_suite_ssl.function
+++ b/tests/suites/test_suite_ssl.function
@@ -1044,7 +1044,7 @@
     {
         /* If /p second_ssl ends the handshake procedure before /p ssl then
          * there is no need to call the next step */
-        if( second_ssl->state != MBEDTLS_SSL_HANDSHAKE_OVER )
+        if( !mbedtls_ssl_is_handshake_over( second_ssl ) )
         {
             ret = mbedtls_ssl_handshake_step( second_ssl );
             if( ret != 0 && ret != MBEDTLS_ERR_SSL_WANT_READ &&
@@ -2089,8 +2089,8 @@
         goto exit;
     }
 
-    TEST_ASSERT( client.ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER );
-    TEST_ASSERT( server.ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER );
+    TEST_ASSERT( mbedtls_ssl_is_handshake_over( &client.ssl ) == 1 );
+    TEST_ASSERT( mbedtls_ssl_is_handshake_over( &server.ssl ) == 1 );
 
     /* Check that both sides have negotiated the expected version. */
     mbedtls_test_set_step( 0 );