Import mbedtls-3.6.0

Imports Mbed TLS 3.6.0 from https://github.com/Mbed-TLS/mbedtls.git
tags mbedtls-3.6.0, v3.6.0

Files that are not needed are removed:

cd lib/libmbedtls
rm -rf mbedtls
cp -R path/to/mbedtls-3.6.0/mbedtls .
cd mbedtls
rm CMakeLists.txt DartConfiguration.tcl Makefile
rm .gitignore .travis.yml .pylintrc .globalrc .mypy.ini BRANCHES.md
rm include/.gitignore include/CMakeLists.txt library/.gitignore
rm library/CMakeLists.txt library/Makefile
rm -r cmake
rm -rf .git .github doxygen configs programs scripts tests visualc
rm -rf 3rdparty ChangeLog.d docs pkgconfig .gitmodules .readthedocs.yaml
rm library/mps_*
cd ..
git add mbedtls

This time we leave library/psa_* present to enable TLS 1.3 features.

This is a complete overwrite of previous code so earlier changes in the
previous branch import/mbedtls-3.4.0 will be added on top of this commit.

Signed-off-by: Tom Van Eyck <tom.vaneyck@kuleuven.be>
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/lib/libmbedtls/mbedtls/library/gcm.c b/lib/libmbedtls/mbedtls/library/gcm.c
index 71fcc35..5dfac23 100644
--- a/lib/libmbedtls/mbedtls/library/gcm.c
+++ b/lib/libmbedtls/mbedtls/library/gcm.c
@@ -2,19 +2,7 @@
  *  NIST SP800-38D compliant GCM implementation
  *
  *  Copyright The Mbed TLS Contributors
- *  SPDX-License-Identifier: Apache-2.0
- *
- *  Licensed under the Apache License, Version 2.0 (the "License"); you may
- *  not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
+ *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  */
 
 /*
@@ -35,6 +23,11 @@
 #include "mbedtls/platform.h"
 #include "mbedtls/platform_util.h"
 #include "mbedtls/error.h"
+#include "mbedtls/constant_time.h"
+
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+#include "block_cipher_internal.h"
+#endif
 
 #include <string.h>
 
@@ -48,6 +41,12 @@
 
 #if !defined(MBEDTLS_GCM_ALT)
 
+/* Used to select the acceleration mechanism */
+#define MBEDTLS_GCM_ACC_SMALLTABLE  0
+#define MBEDTLS_GCM_ACC_LARGETABLE  1
+#define MBEDTLS_GCM_ACC_AESNI       2
+#define MBEDTLS_GCM_ACC_AESCE       3
+
 /*
  * Initialize a context
  */
@@ -56,6 +55,39 @@
     memset(ctx, 0, sizeof(mbedtls_gcm_context));
 }
 
+static inline void gcm_set_acceleration(mbedtls_gcm_context *ctx)
+{
+#if defined(MBEDTLS_GCM_LARGE_TABLE)
+    ctx->acceleration = MBEDTLS_GCM_ACC_LARGETABLE;
+#else
+    ctx->acceleration = MBEDTLS_GCM_ACC_SMALLTABLE;
+#endif
+
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+    /* With CLMUL support, we need only h, not the rest of the table */
+    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
+        ctx->acceleration = MBEDTLS_GCM_ACC_AESNI;
+    }
+#endif
+
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+    if (MBEDTLS_AESCE_HAS_SUPPORT()) {
+        ctx->acceleration = MBEDTLS_GCM_ACC_AESCE;
+    }
+#endif
+}
+
+static inline void gcm_gen_table_rightshift(uint64_t dst[2], const uint64_t src[2])
+{
+    uint8_t *u8Dst = (uint8_t *) dst;
+    uint8_t *u8Src = (uint8_t *) src;
+
+    MBEDTLS_PUT_UINT64_BE(MBEDTLS_GET_UINT64_BE(&src[1], 0) >> 1, &dst[1], 0);
+    u8Dst[8] |= (u8Src[7] & 0x01) << 7;
+    MBEDTLS_PUT_UINT64_BE(MBEDTLS_GET_UINT64_BE(&src[0], 0) >> 1, &dst[0], 0);
+    u8Dst[0] ^= (u8Src[15] & 0x01) ? 0xE1 : 0;
+}
+
 /*
  * Precompute small multiples of H, that is set
  *      HH[i] || HL[i] = H times i,
@@ -67,63 +99,61 @@
 static int gcm_gen_table(mbedtls_gcm_context *ctx)
 {
     int ret, i, j;
-    uint64_t hi, lo;
-    uint64_t vl, vh;
-    unsigned char h[16];
-    size_t olen = 0;
+    uint64_t u64h[2] = { 0 };
+    uint8_t *h = (uint8_t *) u64h;
 
-    memset(h, 0, 16);
-    if ((ret = mbedtls_cipher_update(&ctx->cipher_ctx, h, 16, h, &olen)) != 0) {
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+    ret = mbedtls_block_cipher_encrypt(&ctx->block_cipher_ctx, h, h);
+#else
+    size_t olen = 0;
+    ret = mbedtls_cipher_update(&ctx->cipher_ctx, h, 16, h, &olen);
+#endif
+    if (ret != 0) {
         return ret;
     }
 
-    /* pack h as two 64-bits ints, big-endian */
-    hi = MBEDTLS_GET_UINT32_BE(h,  0);
-    lo = MBEDTLS_GET_UINT32_BE(h,  4);
-    vh = (uint64_t) hi << 32 | lo;
+    gcm_set_acceleration(ctx);
 
-    hi = MBEDTLS_GET_UINT32_BE(h,  8);
-    lo = MBEDTLS_GET_UINT32_BE(h,  12);
-    vl = (uint64_t) hi << 32 | lo;
+    /* MBEDTLS_GCM_HTABLE_SIZE/2 = 1000 corresponds to 1 in GF(2^128) */
+    ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2][0] = u64h[0];
+    ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2][1] = u64h[1];
 
-    /* 8 = 1000 corresponds to 1 in GF(2^128) */
-    ctx->HL[8] = vl;
-    ctx->HH[8] = vh;
-
+    switch (ctx->acceleration) {
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
-    /* With CLMUL support, we need only h, not the rest of the table */
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
-        return 0;
-    }
+        case MBEDTLS_GCM_ACC_AESNI:
+            return 0;
 #endif
 
-#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
-    if (mbedtls_aesce_has_support()) {
-        return 0;
-    }
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+        case MBEDTLS_GCM_ACC_AESCE:
+            return 0;
 #endif
 
-    /* 0 corresponds to 0 in GF(2^128) */
-    ctx->HH[0] = 0;
-    ctx->HL[0] = 0;
+        default:
+            /* 0 corresponds to 0 in GF(2^128) */
+            ctx->H[0][0] = 0;
+            ctx->H[0][1] = 0;
 
-    for (i = 4; i > 0; i >>= 1) {
-        uint32_t T = (vl & 1) * 0xe1000000U;
-        vl  = (vh << 63) | (vl >> 1);
-        vh  = (vh >> 1) ^ ((uint64_t) T << 32);
+            for (i = MBEDTLS_GCM_HTABLE_SIZE/4; i > 0; i >>= 1) {
+                gcm_gen_table_rightshift(ctx->H[i], ctx->H[i*2]);
+            }
 
-        ctx->HL[i] = vl;
-        ctx->HH[i] = vh;
-    }
+#if !defined(MBEDTLS_GCM_LARGE_TABLE)
+            /* pack elements of H as 64-bits ints, big-endian */
+            for (i = MBEDTLS_GCM_HTABLE_SIZE/2; i > 0; i >>= 1) {
+                MBEDTLS_PUT_UINT64_BE(ctx->H[i][0], &ctx->H[i][0], 0);
+                MBEDTLS_PUT_UINT64_BE(ctx->H[i][1], &ctx->H[i][1], 0);
+            }
+#endif
 
-    for (i = 2; i <= 8; i *= 2) {
-        uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i;
-        vh = *HiH;
-        vl = *HiL;
-        for (j = 1; j < i; j++) {
-            HiH[j] = vh ^ ctx->HH[j];
-            HiL[j] = vl ^ ctx->HL[j];
-        }
+            for (i = 2; i < MBEDTLS_GCM_HTABLE_SIZE; i <<= 1) {
+                for (j = 1; j < i; j++) {
+                    mbedtls_xor_no_simd((unsigned char *) ctx->H[i+j],
+                                        (unsigned char *) ctx->H[i],
+                                        (unsigned char *) ctx->H[j],
+                                        16);
+                }
+            }
     }
 
     return 0;
@@ -135,19 +165,31 @@
                        unsigned int keybits)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    const mbedtls_cipher_info_t *cipher_info;
 
     if (keybits != 128 && keybits != 192 && keybits != 256) {
         return MBEDTLS_ERR_GCM_BAD_INPUT;
     }
 
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+    mbedtls_block_cipher_free(&ctx->block_cipher_ctx);
+
+    if ((ret = mbedtls_block_cipher_setup(&ctx->block_cipher_ctx, cipher)) != 0) {
+        return ret;
+    }
+
+    if ((ret = mbedtls_block_cipher_setkey(&ctx->block_cipher_ctx, key, keybits)) != 0) {
+        return ret;
+    }
+#else
+    const mbedtls_cipher_info_t *cipher_info;
+
     cipher_info = mbedtls_cipher_info_from_values(cipher, keybits,
                                                   MBEDTLS_MODE_ECB);
     if (cipher_info == NULL) {
         return MBEDTLS_ERR_GCM_BAD_INPUT;
     }
 
-    if (cipher_info->block_size != 16) {
+    if (mbedtls_cipher_info_get_block_size(cipher_info) != 16) {
         return MBEDTLS_ERR_GCM_BAD_INPUT;
     }
 
@@ -161,6 +203,7 @@
                                      MBEDTLS_ENCRYPT)) != 0) {
         return ret;
     }
+#endif
 
     if ((ret = gcm_gen_table(ctx)) != 0) {
         return ret;
@@ -169,12 +212,86 @@
     return 0;
 }
 
+#if defined(MBEDTLS_GCM_LARGE_TABLE)
+static const uint16_t last8[256] = {
+    0x0000, 0xc201, 0x8403, 0x4602, 0x0807, 0xca06, 0x8c04, 0x4e05,
+    0x100e, 0xd20f, 0x940d, 0x560c, 0x1809, 0xda08, 0x9c0a, 0x5e0b,
+    0x201c, 0xe21d, 0xa41f, 0x661e, 0x281b, 0xea1a, 0xac18, 0x6e19,
+    0x3012, 0xf213, 0xb411, 0x7610, 0x3815, 0xfa14, 0xbc16, 0x7e17,
+    0x4038, 0x8239, 0xc43b, 0x063a, 0x483f, 0x8a3e, 0xcc3c, 0x0e3d,
+    0x5036, 0x9237, 0xd435, 0x1634, 0x5831, 0x9a30, 0xdc32, 0x1e33,
+    0x6024, 0xa225, 0xe427, 0x2626, 0x6823, 0xaa22, 0xec20, 0x2e21,
+    0x702a, 0xb22b, 0xf429, 0x3628, 0x782d, 0xba2c, 0xfc2e, 0x3e2f,
+    0x8070, 0x4271, 0x0473, 0xc672, 0x8877, 0x4a76, 0x0c74, 0xce75,
+    0x907e, 0x527f, 0x147d, 0xd67c, 0x9879, 0x5a78, 0x1c7a, 0xde7b,
+    0xa06c, 0x626d, 0x246f, 0xe66e, 0xa86b, 0x6a6a, 0x2c68, 0xee69,
+    0xb062, 0x7263, 0x3461, 0xf660, 0xb865, 0x7a64, 0x3c66, 0xfe67,
+    0xc048, 0x0249, 0x444b, 0x864a, 0xc84f, 0x0a4e, 0x4c4c, 0x8e4d,
+    0xd046, 0x1247, 0x5445, 0x9644, 0xd841, 0x1a40, 0x5c42, 0x9e43,
+    0xe054, 0x2255, 0x6457, 0xa656, 0xe853, 0x2a52, 0x6c50, 0xae51,
+    0xf05a, 0x325b, 0x7459, 0xb658, 0xf85d, 0x3a5c, 0x7c5e, 0xbe5f,
+    0x00e1, 0xc2e0, 0x84e2, 0x46e3, 0x08e6, 0xcae7, 0x8ce5, 0x4ee4,
+    0x10ef, 0xd2ee, 0x94ec, 0x56ed, 0x18e8, 0xdae9, 0x9ceb, 0x5eea,
+    0x20fd, 0xe2fc, 0xa4fe, 0x66ff, 0x28fa, 0xeafb, 0xacf9, 0x6ef8,
+    0x30f3, 0xf2f2, 0xb4f0, 0x76f1, 0x38f4, 0xfaf5, 0xbcf7, 0x7ef6,
+    0x40d9, 0x82d8, 0xc4da, 0x06db, 0x48de, 0x8adf, 0xccdd, 0x0edc,
+    0x50d7, 0x92d6, 0xd4d4, 0x16d5, 0x58d0, 0x9ad1, 0xdcd3, 0x1ed2,
+    0x60c5, 0xa2c4, 0xe4c6, 0x26c7, 0x68c2, 0xaac3, 0xecc1, 0x2ec0,
+    0x70cb, 0xb2ca, 0xf4c8, 0x36c9, 0x78cc, 0xbacd, 0xfccf, 0x3ece,
+    0x8091, 0x4290, 0x0492, 0xc693, 0x8896, 0x4a97, 0x0c95, 0xce94,
+    0x909f, 0x529e, 0x149c, 0xd69d, 0x9898, 0x5a99, 0x1c9b, 0xde9a,
+    0xa08d, 0x628c, 0x248e, 0xe68f, 0xa88a, 0x6a8b, 0x2c89, 0xee88,
+    0xb083, 0x7282, 0x3480, 0xf681, 0xb884, 0x7a85, 0x3c87, 0xfe86,
+    0xc0a9, 0x02a8, 0x44aa, 0x86ab, 0xc8ae, 0x0aaf, 0x4cad, 0x8eac,
+    0xd0a7, 0x12a6, 0x54a4, 0x96a5, 0xd8a0, 0x1aa1, 0x5ca3, 0x9ea2,
+    0xe0b5, 0x22b4, 0x64b6, 0xa6b7, 0xe8b2, 0x2ab3, 0x6cb1, 0xaeb0,
+    0xf0bb, 0x32ba, 0x74b8, 0xb6b9, 0xf8bc, 0x3abd, 0x7cbf, 0xbebe
+};
+
+static void gcm_mult_largetable(uint8_t *output, const uint8_t *x, uint64_t H[256][2])
+{
+    int i;
+    uint64_t u64z[2];
+    uint16_t *u16z = (uint16_t *) u64z;
+    uint8_t *u8z = (uint8_t *) u64z;
+    uint8_t rem;
+
+    u64z[0] = 0;
+    u64z[1] = 0;
+
+    if (MBEDTLS_IS_BIG_ENDIAN) {
+        for (i = 15; i > 0; i--) {
+            mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[x[i]], 16);
+            rem = u8z[15];
+
+            u64z[1] >>= 8;
+            u8z[8] = u8z[7];
+            u64z[0] >>= 8;
+
+            u16z[0] ^= MBEDTLS_GET_UINT16_LE(&last8[rem], 0);
+        }
+    } else {
+        for (i = 15; i > 0; i--) {
+            mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[x[i]], 16);
+            rem = u8z[15];
+
+            u64z[1] <<= 8;
+            u8z[8] = u8z[7];
+            u64z[0] <<= 8;
+
+            u16z[0] ^= last8[rem];
+        }
+    }
+
+    mbedtls_xor_no_simd(output, u8z, (uint8_t *) H[x[0]], 16);
+}
+#else
 /*
  * Shoup's method for multiplication use this table with
  *      last4[x] = x times P^128
  * where x and last4[x] are seen as elements of GF(2^128) as in [MGV]
  */
-static const uint64_t last4[16] =
+static const uint16_t last4[16] =
 {
     0x0000, 0x1c20, 0x3840, 0x2460,
     0x7080, 0x6ca0, 0x48c0, 0x54e0,
@@ -182,6 +299,47 @@
     0x9180, 0x8da0, 0xa9c0, 0xb5e0
 };
 
+static void gcm_mult_smalltable(uint8_t *output, const uint8_t *x, uint64_t H[16][2])
+{
+    int i = 0;
+    unsigned char lo, hi, rem;
+    uint64_t u64z[2];
+    const uint64_t *pu64z = NULL;
+    uint8_t *u8z = (uint8_t *) u64z;
+
+    lo = x[15] & 0xf;
+    hi = (x[15] >> 4) & 0xf;
+
+    pu64z = H[lo];
+
+    rem = (unsigned char) pu64z[1] & 0xf;
+    u64z[1] = (pu64z[0] << 60) | (pu64z[1] >> 4);
+    u64z[0] = (pu64z[0] >> 4);
+    u64z[0] ^= (uint64_t) last4[rem] << 48;
+    mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[hi], 16);
+
+    for (i = 14; i >= 0; i--) {
+        lo = x[i] & 0xf;
+        hi = (x[i] >> 4) & 0xf;
+
+        rem = (unsigned char) u64z[1] & 0xf;
+        u64z[1] = (u64z[0] << 60) | (u64z[1] >> 4);
+        u64z[0] = (u64z[0] >> 4);
+        u64z[0] ^= (uint64_t) last4[rem] << 48;
+        mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[lo], 16);
+
+        rem = (unsigned char) u64z[1] & 0xf;
+        u64z[1] = (u64z[0] << 60) | (u64z[1] >> 4);
+        u64z[0] = (u64z[0] >> 4);
+        u64z[0] ^= (uint64_t) last4[rem] << 48;
+        mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[hi], 16);
+    }
+
+    MBEDTLS_PUT_UINT64_BE(u64z[0], output, 0);
+    MBEDTLS_PUT_UINT64_BE(u64z[1], output, 8);
+}
+#endif
+
 /*
  * Sets output to x times H using the precomputed tables.
  * x and output are seen as elements of GF(2^128) as in [MGV].
@@ -189,71 +347,31 @@
 static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16],
                      unsigned char output[16])
 {
-    int i = 0;
-    unsigned char lo, hi, rem;
-    uint64_t zh, zl;
-
+    switch (ctx->acceleration) {
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
-        unsigned char h[16];
-
-        /* mbedtls_aesni_gcm_mult needs big-endian input */
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h,  0);
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8],       h,  4);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h,  8);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8],       h, 12);
-
-        mbedtls_aesni_gcm_mult(output, x, h);
-        return;
-    }
-#endif /* MBEDTLS_AESNI_HAVE_CODE */
-
-#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
-    if (mbedtls_aesce_has_support()) {
-        unsigned char h[16];
-
-        /* mbedtls_aesce_gcm_mult needs big-endian input */
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h,  0);
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8],       h,  4);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h,  8);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8],       h, 12);
-
-        mbedtls_aesce_gcm_mult(output, x, h);
-        return;
-    }
+        case MBEDTLS_GCM_ACC_AESNI:
+            mbedtls_aesni_gcm_mult(output, x, (uint8_t *) ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2]);
+            break;
 #endif
 
-    lo = x[15] & 0xf;
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+        case MBEDTLS_GCM_ACC_AESCE:
+            mbedtls_aesce_gcm_mult(output, x, (uint8_t *) ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2]);
+            break;
+#endif
 
-    zh = ctx->HH[lo];
-    zl = ctx->HL[lo];
-
-    for (i = 15; i >= 0; i--) {
-        lo = x[i] & 0xf;
-        hi = (x[i] >> 4) & 0xf;
-
-        if (i != 15) {
-            rem = (unsigned char) zl & 0xf;
-            zl = (zh << 60) | (zl >> 4);
-            zh = (zh >> 4);
-            zh ^= (uint64_t) last4[rem] << 48;
-            zh ^= ctx->HH[lo];
-            zl ^= ctx->HL[lo];
-
-        }
-
-        rem = (unsigned char) zl & 0xf;
-        zl = (zh << 60) | (zl >> 4);
-        zh = (zh >> 4);
-        zh ^= (uint64_t) last4[rem] << 48;
-        zh ^= ctx->HH[hi];
-        zl ^= ctx->HL[hi];
+#if defined(MBEDTLS_GCM_LARGE_TABLE)
+        case MBEDTLS_GCM_ACC_LARGETABLE:
+            gcm_mult_largetable(output, x, ctx->H);
+            break;
+#else
+        case MBEDTLS_GCM_ACC_SMALLTABLE:
+            gcm_mult_smalltable(output, x, ctx->H);
+            break;
+#endif
     }
 
-    MBEDTLS_PUT_UINT32_BE(zh >> 32, output, 0);
-    MBEDTLS_PUT_UINT32_BE(zh, output, 4);
-    MBEDTLS_PUT_UINT32_BE(zl >> 32, output, 8);
-    MBEDTLS_PUT_UINT32_BE(zl, output, 12);
+    return;
 }
 
 int mbedtls_gcm_starts(mbedtls_gcm_context *ctx,
@@ -263,8 +381,11 @@
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char work_buf[16];
     const unsigned char *p;
-    size_t use_len, olen = 0;
+    size_t use_len;
     uint64_t iv_bits;
+#if !defined(MBEDTLS_BLOCK_CIPHER_C)
+    size_t olen = 0;
+#endif
 
     /* IV is limited to 2^64 bits, so 2^61 bytes */
     /* IV is not allowed to be zero length */
@@ -291,8 +412,17 @@
         while (iv_len > 0) {
             use_len = (iv_len < 16) ? iv_len : 16;
 
+#if defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 70110)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wstringop-overflow=0"
+#endif
+
             mbedtls_xor(ctx->y, ctx->y, p, use_len);
 
+#if defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 70110)
+#pragma GCC diagnostic pop
+#endif
+
             gcm_mult(ctx, ctx->y, ctx->y);
 
             iv_len -= use_len;
@@ -304,8 +434,13 @@
         gcm_mult(ctx, ctx->y, ctx->y);
     }
 
-    if ((ret = mbedtls_cipher_update(&ctx->cipher_ctx, ctx->y, 16,
-                                     ctx->base_ectr, &olen)) != 0) {
+
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+    ret = mbedtls_block_cipher_encrypt(&ctx->block_cipher_ctx, ctx->y, ctx->base_ectr);
+#else
+    ret = mbedtls_cipher_update(&ctx->cipher_ctx, ctx->y, 16, ctx->base_ectr, &olen);
+#endif
+    if (ret != 0) {
         return ret;
     }
 
@@ -334,9 +469,17 @@
 {
     const unsigned char *p;
     size_t use_len, offset;
+    uint64_t new_add_len;
 
-    /* IV is limited to 2^64 bits, so 2^61 bytes */
-    if ((uint64_t) add_len >> 61 != 0) {
+    /* AD is limited to 2^64 bits, ie 2^61 bytes
+     * Also check for possible overflow */
+#if SIZE_MAX > 0xFFFFFFFFFFFFFFFFULL
+    if (add_len > 0xFFFFFFFFFFFFFFFFULL) {
+        return MBEDTLS_ERR_GCM_BAD_INPUT;
+    }
+#endif
+    new_add_len = ctx->add_len + (uint64_t) add_len;
+    if (new_add_len < ctx->add_len || new_add_len >> 61 != 0) {
         return MBEDTLS_ERR_GCM_BAD_INPUT;
     }
 
@@ -381,12 +524,9 @@
 /* Increment the counter. */
 static void gcm_incr(unsigned char y[16])
 {
-    size_t i;
-    for (i = 16; i > 12; i--) {
-        if (++y[i - 1] != 0) {
-            break;
-        }
-    }
+    uint32_t x = MBEDTLS_GET_UINT32_BE(y, 12);
+    x++;
+    MBEDTLS_PUT_UINT32_BE(x, y, 12);
 }
 
 /* Calculate and apply the encryption mask. Process use_len bytes of data,
@@ -397,11 +537,15 @@
                     const unsigned char *input,
                     unsigned char *output)
 {
-    size_t olen = 0;
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
-    if ((ret = mbedtls_cipher_update(&ctx->cipher_ctx, ctx->y, 16, ectr,
-                                     &olen)) != 0) {
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+    ret = mbedtls_block_cipher_encrypt(&ctx->block_cipher_ctx, ctx->y, ectr);
+#else
+    size_t olen = 0;
+    ret = mbedtls_cipher_update(&ctx->cipher_ctx, ctx->y, 16, ectr, &olen);
+#endif
+    if (ret != 0) {
         mbedtls_platform_zeroize(ectr, 16);
         return ret;
     }
@@ -518,6 +662,9 @@
     (void) output_size;
     *output_length = 0;
 
+    /* Total length is restricted to 2^39 - 256 bits, ie 2^36 - 2^5 bytes
+     * and AD length is restricted to 2^64 bits, ie 2^61 bytes so neither of
+     * the two multiplications would overflow. */
     orig_len = ctx->len * 8;
     orig_add_len = ctx->add_len * 8;
 
@@ -601,7 +748,6 @@
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     unsigned char check_tag[16];
-    size_t i;
     int diff;
 
     if ((ret = mbedtls_gcm_crypt_and_tag(ctx, MBEDTLS_GCM_DECRYPT, length,
@@ -611,9 +757,7 @@
     }
 
     /* Check tag in "constant-time" */
-    for (diff = 0, i = 0; i < tag_len; i++) {
-        diff |= tag[i] ^ check_tag[i];
-    }
+    diff = mbedtls_ct_memcmp(tag, check_tag, tag_len);
 
     if (diff != 0) {
         mbedtls_platform_zeroize(output, length);
@@ -628,13 +772,17 @@
     if (ctx == NULL) {
         return;
     }
+#if defined(MBEDTLS_BLOCK_CIPHER_C)
+    mbedtls_block_cipher_free(&ctx->block_cipher_ctx);
+#else
     mbedtls_cipher_free(&ctx->cipher_ctx);
+#endif
     mbedtls_platform_zeroize(ctx, sizeof(mbedtls_gcm_context));
 }
 
 #endif /* !MBEDTLS_GCM_ALT */
 
-#if defined(MBEDTLS_SELF_TEST) && defined(MBEDTLS_AES_C)
+#if defined(MBEDTLS_SELF_TEST) && defined(MBEDTLS_CCM_GCM_CAN_AES)
 /*
  * AES-GCM test vectors from:
  *
@@ -645,7 +793,7 @@
 static const int key_index_test_data[MAX_TESTS] =
 { 0, 0, 1, 1, 1, 1 };
 
-static const unsigned char key_test_data[MAX_TESTS][32] =
+static const unsigned char key_test_data[][32] =
 {
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -663,7 +811,7 @@
 static const int iv_index_test_data[MAX_TESTS] =
 { 0, 0, 1, 1, 1, 2 };
 
-static const unsigned char iv_test_data[MAX_TESTS][64] =
+static const unsigned char iv_test_data[][64] =
 {
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
       0x00, 0x00, 0x00, 0x00 },
@@ -685,7 +833,7 @@
 static const int add_index_test_data[MAX_TESTS] =
 { 0, 0, 0, 1, 1, 1 };
 
-static const unsigned char additional_test_data[MAX_TESTS][64] =
+static const unsigned char additional_test_data[][64] =
 {
     { 0x00 },
     { 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
@@ -699,7 +847,7 @@
 static const int pt_index_test_data[MAX_TESTS] =
 { 0, 0, 1, 1, 1, 1 };
 
-static const unsigned char pt_test_data[MAX_TESTS][64] =
+static const unsigned char pt_test_data[][64] =
 {
     { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
@@ -713,7 +861,7 @@
       0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55 },
 };
 
-static const unsigned char ct_test_data[MAX_TESTS * 3][64] =
+static const unsigned char ct_test_data[][64] =
 {
     { 0x00 },
     { 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
@@ -750,6 +898,7 @@
       0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
       0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
       0x4c, 0x34, 0xae, 0xe5 },
+#if !defined(MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH)
     { 0x00 },
     { 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
       0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00 },
@@ -820,9 +969,10 @@
       0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
       0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
       0x44, 0xae, 0x7e, 0x3f },
+#endif /* !MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH */
 };
 
-static const unsigned char tag_test_data[MAX_TESTS * 3][16] =
+static const unsigned char tag_test_data[][16] =
 {
     { 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
       0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a },
@@ -836,6 +986,7 @@
       0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb },
     { 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
       0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50 },
+#if !defined(MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH)
     { 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
       0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35 },
     { 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
@@ -860,6 +1011,7 @@
       0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2 },
     { 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
       0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a },
+#endif /* !MBEDTLS_AES_ONLY_128_BIT_KEY_LENGTH */
 };
 
 int mbedtls_gcm_self_test(int verbose)
@@ -880,21 +1032,31 @@
             mbedtls_printf("  GCM note: using AESNI.\n");
         } else
 #endif
+
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+        if (MBEDTLS_AESCE_HAS_SUPPORT()) {
+            mbedtls_printf("  GCM note: using AESCE.\n");
+        } else
+#endif
+
         mbedtls_printf("  GCM note: built-in implementation.\n");
 #endif /* MBEDTLS_GCM_ALT */
     }
 
-    for (j = 0; j < 3; j++) {
+    static const int loop_limit =
+        (sizeof(ct_test_data) / sizeof(*ct_test_data)) / MAX_TESTS;
+
+    for (j = 0; j < loop_limit; j++) {
         int key_len = 128 + 64 * j;
 
         for (i = 0; i < MAX_TESTS; i++) {
-            mbedtls_gcm_init(&ctx);
-
             if (verbose != 0) {
                 mbedtls_printf("  AES-GCM-%3d #%d (%s): ",
                                key_len, i, "enc");
             }
 
+            mbedtls_gcm_init(&ctx);
+
             ret = mbedtls_gcm_setkey(&ctx, cipher,
                                      key_test_data[key_index_test_data[i]],
                                      key_len);