Enhance GCM throughput using larger precalculated tables. Also refactored the code for shorter tables and moved the check for available accelerators to the context initialization code.

Signed-off-by: Matthias Schulz <mschulz@hilscher.com>
diff --git a/include/mbedtls/gcm.h b/include/mbedtls/gcm.h
index 631b392..f475710 100644
--- a/include/mbedtls/gcm.h
+++ b/include/mbedtls/gcm.h
@@ -33,6 +33,11 @@
 #define MBEDTLS_GCM_ENCRYPT     1
 #define MBEDTLS_GCM_DECRYPT     0
 
+#define MBEDTLS_GCM_ACC_SMALLTABLE  0
+#define MBEDTLS_GCM_ACC_LARGETABLE  1
+#define MBEDTLS_GCM_ACC_AESNI       2
+#define MBEDTLS_GCM_ACC_AESCE       3
+
 /** Authenticated decryption failed. */
 #define MBEDTLS_ERR_GCM_AUTH_FAILED                       -0x0012
 /** Bad input parameters to function. */
@@ -46,6 +51,12 @@
 
 #if !defined(MBEDTLS_GCM_ALT)
 
+#if defined(MBEDTLS_GCM_LARGETABLE)
+#define MBEDTLS_GCM_HTABLE_SIZE 256
+#else
+#define MBEDTLS_GCM_HTABLE_SIZE 16
+#endif
+
 /**
  * \brief          The GCM context structure.
  */
@@ -53,18 +64,22 @@
 #if defined(MBEDTLS_BLOCK_CIPHER_C)
     mbedtls_block_cipher_context_t MBEDTLS_PRIVATE(block_cipher_ctx);  /*!< The cipher context used. */
 #else
-    mbedtls_cipher_context_t MBEDTLS_PRIVATE(cipher_ctx);  /*!< The cipher context used. */
+    mbedtls_cipher_context_t MBEDTLS_PRIVATE(cipher_ctx);    /*!< The cipher context used. */
 #endif
-    uint64_t MBEDTLS_PRIVATE(HL)[16];                      /*!< Precalculated HTable low. */
-    uint64_t MBEDTLS_PRIVATE(HH)[16];                      /*!< Precalculated HTable high. */
-    uint64_t MBEDTLS_PRIVATE(len);                         /*!< The total length of the encrypted data. */
-    uint64_t MBEDTLS_PRIVATE(add_len);                     /*!< The total length of the additional data. */
-    unsigned char MBEDTLS_PRIVATE(base_ectr)[16];          /*!< The first ECTR for tag. */
-    unsigned char MBEDTLS_PRIVATE(y)[16];                  /*!< The Y working value. */
-    unsigned char MBEDTLS_PRIVATE(buf)[16];                /*!< The buf working value. */
-    int MBEDTLS_PRIVATE(mode);                             /*!< The operation to perform:
-                                                            #MBEDTLS_GCM_ENCRYPT or
-                                                            #MBEDTLS_GCM_DECRYPT. */
+    uint64_t MBEDTLS_PRIVATE(H)[MBEDTLS_GCM_HTABLE_SIZE][2]; /*!< Precalculated HTable. */
+    uint64_t MBEDTLS_PRIVATE(len);                           /*!< The total length of the encrypted data. */
+    uint64_t MBEDTLS_PRIVATE(add_len);                       /*!< The total length of the additional data. */
+    unsigned char MBEDTLS_PRIVATE(base_ectr)[16];            /*!< The first ECTR for tag. */
+    unsigned char MBEDTLS_PRIVATE(y)[16];                    /*!< The Y working value. */
+    unsigned char MBEDTLS_PRIVATE(buf)[16];                  /*!< The buf working value. */
+    unsigned char MBEDTLS_PRIVATE(mode);                     /*!< The operation to perform:
+                                                              #MBEDTLS_GCM_ENCRYPT or
+                                                              #MBEDTLS_GCM_DECRYPT. */
+    unsigned char MBEDTLS_PRIVATE(acceleration);             /*!< The acceleration to use:
+                                                              #MBEDTLS_GCM_ACC_SMALLTABLE,
+                                                              #MBEDTLS_GCM_ACC_LARGETABLE,
+                                                              #MBEDTLS_GCM_ACC_AESNI,
+                                                              #MBEDTLS_GCM_ACC_AESCE */
 }
 mbedtls_gcm_context;
 
diff --git a/include/mbedtls/mbedtls_config.h b/include/mbedtls/mbedtls_config.h
index 6a5828c..f467737 100644
--- a/include/mbedtls/mbedtls_config.h
+++ b/include/mbedtls/mbedtls_config.h
@@ -2801,6 +2801,20 @@
 #define MBEDTLS_GCM_C
 
 /**
+ * \def MBEDTLS_GCM_LARGETABLE
+ *
+ * Enable large precomputer tables for  Galois/Counter Mode (GCM).
+ * Can significantly increase throughput on systems without GCM hardware
+ * acceleration (e.g., AESNI, AESCE).
+ *
+ * Module:  library/gcm.c
+ *
+ * Requires: MBEDTLS_GCM_C
+ *
+ */
+//#define MBEDTLS_GCM_LARGETABLE
+
+/**
  * \def MBEDTLS_HKDF_C
  *
  * Enable the HKDF algorithm (RFC 5869).
diff --git a/library/gcm.c b/library/gcm.c
index 033cb59..4a01f5c 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -49,6 +49,39 @@
     memset(ctx, 0, sizeof(mbedtls_gcm_context));
 }
 
+static inline void gcm_set_acceleration(mbedtls_gcm_context *ctx)
+{
+#if defined(MBEDTLS_GCM_LARGETABLE)
+    ctx->acceleration = MBEDTLS_GCM_ACC_LARGETABLE;
+#else
+    ctx->acceleration = MBEDTLS_GCM_ACC_SMALLTABLE;
+#endif
+
+#if defined(MBEDTLS_AESNI_HAVE_CODE)
+    /* With CLMUL support, we need only h, not the rest of the table */
+    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
+        ctx->acceleration = MBEDTLS_GCM_ACC_AESNI;
+    }
+#endif
+
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+    if (MBEDTLS_AESCE_HAS_SUPPORT()) {
+        ctx->acceleration = MBEDTLS_GCM_ACC_AESCE;
+    }
+#endif
+}
+
+static inline void gcm_gen_table_rightshift(uint64_t dst[2], const uint64_t src[2])
+{
+    uint8_t *u8Dst = (uint8_t *) dst;
+    uint8_t *u8Src = (uint8_t *) src;
+
+    MBEDTLS_PUT_UINT64_BE(MBEDTLS_GET_UINT64_BE(&src[1], 0) >> 1, &dst[1], 0);
+    u8Dst[8] |= (u8Src[7] & 0x01) << 7;
+    MBEDTLS_PUT_UINT64_BE(MBEDTLS_GET_UINT64_BE(&src[0], 0) >> 1, &dst[0], 0);
+    u8Dst[0] ^= (u8Src[15] & 0x01) ? 0xE1 : 0;
+}
+
 /*
  * Precompute small multiples of H, that is set
  *      HH[i] || HL[i] = H times i,
@@ -60,11 +93,8 @@
 static int gcm_gen_table(mbedtls_gcm_context *ctx)
 {
     int ret, i, j;
-    uint64_t hi, lo;
-    uint64_t vl, vh;
-    unsigned char h[16];
-
-    memset(h, 0, 16);
+    uint64_t u64h[2] = { 0 };
+    uint8_t *h = (uint8_t *) u64h;
 
 #if defined(MBEDTLS_BLOCK_CIPHER_C)
     ret = mbedtls_block_cipher_encrypt(&ctx->block_cipher_ctx, h, h);
@@ -76,53 +106,48 @@
         return ret;
     }
 
-    /* pack h as two 64-bits ints, big-endian */
-    hi = MBEDTLS_GET_UINT32_BE(h,  0);
-    lo = MBEDTLS_GET_UINT32_BE(h,  4);
-    vh = (uint64_t) hi << 32 | lo;
+    gcm_set_acceleration(ctx);
 
-    hi = MBEDTLS_GET_UINT32_BE(h,  8);
-    lo = MBEDTLS_GET_UINT32_BE(h,  12);
-    vl = (uint64_t) hi << 32 | lo;
+    /* MBEDTLS_GCM_HTABLE_SIZE/2 = 1000 corresponds to 1 in GF(2^128) */
+    ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2][0] = u64h[0];
+    ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2][1] = u64h[1];
 
-    /* 8 = 1000 corresponds to 1 in GF(2^128) */
-    ctx->HL[8] = vl;
-    ctx->HH[8] = vh;
-
+    switch (ctx->acceleration) {
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
-    /* With CLMUL support, we need only h, not the rest of the table */
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
-        return 0;
-    }
+        case MBEDTLS_GCM_ACC_AESNI:
+            return 0;
 #endif
 
 #if defined(MBEDTLS_AESCE_HAVE_CODE)
-    if (MBEDTLS_AESCE_HAS_SUPPORT()) {
-        return 0;
-    }
+        case MBEDTLS_GCM_ACC_AESCE:
+            return 0;
 #endif
 
-    /* 0 corresponds to 0 in GF(2^128) */
-    ctx->HH[0] = 0;
-    ctx->HL[0] = 0;
+        default:
+            /* 0 corresponds to 0 in GF(2^128) */
+            ctx->H[0][0] = 0;
+            ctx->H[0][1] = 0;
 
-    for (i = 4; i > 0; i >>= 1) {
-        uint32_t T = (vl & 1) * 0xe1000000U;
-        vl  = (vh << 63) | (vl >> 1);
-        vh  = (vh >> 1) ^ ((uint64_t) T << 32);
+            for (i = MBEDTLS_GCM_HTABLE_SIZE/4; i > 0; i >>= 1) {
+                gcm_gen_table_rightshift(ctx->H[i], ctx->H[i*2]);
+            }
 
-        ctx->HL[i] = vl;
-        ctx->HH[i] = vh;
-    }
+#if !defined(MBEDTLS_GCM_LARGETABLE)
+            /* pack elements of H as 64-bits ints, big-endian */
+            for (i = MBEDTLS_GCM_HTABLE_SIZE/2; i > 0; i >>= 1) {
+                MBEDTLS_PUT_UINT64_BE(ctx->H[i][0], &ctx->H[i][0], 0);
+                MBEDTLS_PUT_UINT64_BE(ctx->H[i][1], &ctx->H[i][1], 0);
+            }
+#endif
 
-    for (i = 2; i <= 8; i *= 2) {
-        uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i;
-        vh = *HiH;
-        vl = *HiL;
-        for (j = 1; j < i; j++) {
-            HiH[j] = vh ^ ctx->HH[j];
-            HiL[j] = vl ^ ctx->HL[j];
-        }
+            for (i = 2; i < MBEDTLS_GCM_HTABLE_SIZE; i <<= 1) {
+                for (j = 1; j < i; j++) {
+                    mbedtls_xor_no_simd((unsigned char *) ctx->H[i+j],
+                                        (unsigned char *) ctx->H[i],
+                                        (unsigned char *) ctx->H[j],
+                                        16);
+                }
+            }
     }
 
     return 0;
@@ -181,6 +206,69 @@
     return 0;
 }
 
+#if defined(MBEDTLS_GCM_LARGETABLE)
+static const uint16_t last8[256] = {
+    0x0000, 0xc201, 0x8403, 0x4602, 0x0807, 0xca06, 0x8c04, 0x4e05,
+    0x100e, 0xd20f, 0x940d, 0x560c, 0x1809, 0xda08, 0x9c0a, 0x5e0b,
+    0x201c, 0xe21d, 0xa41f, 0x661e, 0x281b, 0xea1a, 0xac18, 0x6e19,
+    0x3012, 0xf213, 0xb411, 0x7610, 0x3815, 0xfa14, 0xbc16, 0x7e17,
+    0x4038, 0x8239, 0xc43b, 0x063a, 0x483f, 0x8a3e, 0xcc3c, 0x0e3d,
+    0x5036, 0x9237, 0xd435, 0x1634, 0x5831, 0x9a30, 0xdc32, 0x1e33,
+    0x6024, 0xa225, 0xe427, 0x2626, 0x6823, 0xaa22, 0xec20, 0x2e21,
+    0x702a, 0xb22b, 0xf429, 0x3628, 0x782d, 0xba2c, 0xfc2e, 0x3e2f,
+    0x8070, 0x4271, 0x0473, 0xc672, 0x8877, 0x4a76, 0x0c74, 0xce75,
+    0x907e, 0x527f, 0x147d, 0xd67c, 0x9879, 0x5a78, 0x1c7a, 0xde7b,
+    0xa06c, 0x626d, 0x246f, 0xe66e, 0xa86b, 0x6a6a, 0x2c68, 0xee69,
+    0xb062, 0x7263, 0x3461, 0xf660, 0xb865, 0x7a64, 0x3c66, 0xfe67,
+    0xc048, 0x0249, 0x444b, 0x864a, 0xc84f, 0x0a4e, 0x4c4c, 0x8e4d,
+    0xd046, 0x1247, 0x5445, 0x9644, 0xd841, 0x1a40, 0x5c42, 0x9e43,
+    0xe054, 0x2255, 0x6457, 0xa656, 0xe853, 0x2a52, 0x6c50, 0xae51,
+    0xf05a, 0x325b, 0x7459, 0xb658, 0xf85d, 0x3a5c, 0x7c5e, 0xbe5f,
+    0x00e1, 0xc2e0, 0x84e2, 0x46e3, 0x08e6, 0xcae7, 0x8ce5, 0x4ee4,
+    0x10ef, 0xd2ee, 0x94ec, 0x56ed, 0x18e8, 0xdae9, 0x9ceb, 0x5eea,
+    0x20fd, 0xe2fc, 0xa4fe, 0x66ff, 0x28fa, 0xeafb, 0xacf9, 0x6ef8,
+    0x30f3, 0xf2f2, 0xb4f0, 0x76f1, 0x38f4, 0xfaf5, 0xbcf7, 0x7ef6,
+    0x40d9, 0x82d8, 0xc4da, 0x06db, 0x48de, 0x8adf, 0xccdd, 0x0edc,
+    0x50d7, 0x92d6, 0xd4d4, 0x16d5, 0x58d0, 0x9ad1, 0xdcd3, 0x1ed2,
+    0x60c5, 0xa2c4, 0xe4c6, 0x26c7, 0x68c2, 0xaac3, 0xecc1, 0x2ec0,
+    0x70cb, 0xb2ca, 0xf4c8, 0x36c9, 0x78cc, 0xbacd, 0xfccf, 0x3ece,
+    0x8091, 0x4290, 0x0492, 0xc693, 0x8896, 0x4a97, 0x0c95, 0xce94,
+    0x909f, 0x529e, 0x149c, 0xd69d, 0x9898, 0x5a99, 0x1c9b, 0xde9a,
+    0xa08d, 0x628c, 0x248e, 0xe68f, 0xa88a, 0x6a8b, 0x2c89, 0xee88,
+    0xb083, 0x7282, 0x3480, 0xf681, 0xb884, 0x7a85, 0x3c87, 0xfe86,
+    0xc0a9, 0x02a8, 0x44aa, 0x86ab, 0xc8ae, 0x0aaf, 0x4cad, 0x8eac,
+    0xd0a7, 0x12a6, 0x54a4, 0x96a5, 0xd8a0, 0x1aa1, 0x5ca3, 0x9ea2,
+    0xe0b5, 0x22b4, 0x64b6, 0xa6b7, 0xe8b2, 0x2ab3, 0x6cb1, 0xaeb0,
+    0xf0bb, 0x32ba, 0x74b8, 0xb6b9, 0xf8bc, 0x3abd, 0x7cbf, 0xbebe
+};
+
+static void gcm_mult_largetable(uint8_t *output, const uint8_t *x, uint64_t H[256][2])
+{
+    int i;
+    uint64_t u64z[2];
+    uint16_t *u16z = (uint16_t *) u64z;
+    uint8_t *u8z = (uint8_t *) u64z;
+    uint8_t rem;
+
+    u64z[0] = 0;
+    u64z[1] = 0;
+
+    for (i = 15; i > 0; i--) {
+        mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[x[i]], 16);
+
+        rem = u8z[15];
+
+        u64z[1] <<= 8;
+        u8z[8] = u8z[7];
+        u64z[0] <<= 8;
+
+        u8z[0] = 0;
+        u16z[0] ^= last8[rem];
+    }
+
+    mbedtls_xor_no_simd(output, u8z, (uint8_t *) H[x[0]], 16);
+}
+#else
 /*
  * Shoup's method for multiplication use this table with
  *      last4[x] = x times P^128
@@ -194,6 +282,49 @@
     0x9180, 0x8da0, 0xa9c0, 0xb5e0
 };
 
+static void gcm_mult_smalltable(uint8_t *output, const uint8_t *x, uint64_t H[16][2])
+{
+    int i = 0;
+    unsigned char lo, hi, rem;
+    uint64_t u64z[2];
+    const uint64_t *pu64z = 0;
+    uint8_t *u8z = (uint8_t *) u64z;
+
+    lo = x[15] & 0xf;
+    hi = (x[15] >> 4) & 0xf;
+
+    pu64z = H[lo];
+
+    rem = (unsigned char) pu64z[1] & 0xf;
+    u64z[1] = (pu64z[0] << 60) | (pu64z[1] >> 4);
+    u64z[0] = (pu64z[0] >> 4);
+    u64z[0] ^= (uint64_t) last4[rem] << 48;
+    mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[hi], 16);
+
+    for (i = 14; i >= 0; i--) {
+        lo = x[i] & 0xf;
+        hi = (x[i] >> 4) & 0xf;
+
+        rem = (unsigned char) u64z[1] & 0xf;
+        u64z[1] = (u64z[0] << 60) | (u64z[1] >> 4);
+        u64z[0] = (u64z[0] >> 4);
+        u64z[0] ^= (uint64_t) last4[rem] << 48;
+        mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[lo], 16);
+
+        rem = (unsigned char) u64z[1] & 0xf;
+        u64z[1] = (u64z[0] << 60) | (u64z[1] >> 4);
+        u64z[0] = (u64z[0] >> 4);
+        u64z[0] ^= (uint64_t) last4[rem] << 48;
+        mbedtls_xor_no_simd(u8z, u8z, (uint8_t *) H[hi], 16);
+    }
+
+    MBEDTLS_PUT_UINT32_BE(u64z[0] >> 32, output, 0);
+    MBEDTLS_PUT_UINT32_BE(u64z[0], output, 4);
+    MBEDTLS_PUT_UINT32_BE(u64z[1] >> 32, output, 8);
+    MBEDTLS_PUT_UINT32_BE(u64z[1], output, 12);
+}
+#endif
+
 /*
  * Sets output to x times H using the precomputed tables.
  * x and output are seen as elements of GF(2^128) as in [MGV].
@@ -201,71 +332,31 @@
 static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16],
                      unsigned char output[16])
 {
-    int i = 0;
-    unsigned char lo, hi, rem;
-    uint64_t zh, zl;
-
+    switch (ctx->acceleration) {
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_CLMUL)) {
-        unsigned char h[16];
-
-        /* mbedtls_aesni_gcm_mult needs big-endian input */
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h,  0);
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8],       h,  4);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h,  8);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8],       h, 12);
-
-        mbedtls_aesni_gcm_mult(output, x, h);
-        return;
-    }
-#endif /* MBEDTLS_AESNI_HAVE_CODE */
-
-#if defined(MBEDTLS_AESCE_HAVE_CODE)
-    if (MBEDTLS_AESCE_HAS_SUPPORT()) {
-        unsigned char h[16];
-
-        /* mbedtls_aesce_gcm_mult needs big-endian input */
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h,  0);
-        MBEDTLS_PUT_UINT32_BE(ctx->HH[8],       h,  4);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h,  8);
-        MBEDTLS_PUT_UINT32_BE(ctx->HL[8],       h, 12);
-
-        mbedtls_aesce_gcm_mult(output, x, h);
-        return;
-    }
+        case MBEDTLS_GCM_ACC_AESNI:
+            mbedtls_aesni_gcm_mult(output, x, (uint8_t *) ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2]);
+            break;
 #endif
 
-    lo = x[15] & 0xf;
+#if defined(MBEDTLS_AESCE_HAVE_CODE)
+        case MBEDTLS_GCM_ACC_AESCE:
+            mbedtls_aesce_gcm_mult(output, x, (uint8_t *) ctx->H[MBEDTLS_GCM_HTABLE_SIZE/2]);
+            break;
+#endif
 
-    zh = ctx->HH[lo];
-    zl = ctx->HL[lo];
-
-    for (i = 15; i >= 0; i--) {
-        lo = x[i] & 0xf;
-        hi = (x[i] >> 4) & 0xf;
-
-        if (i != 15) {
-            rem = (unsigned char) zl & 0xf;
-            zl = (zh << 60) | (zl >> 4);
-            zh = (zh >> 4);
-            zh ^= (uint64_t) last4[rem] << 48;
-            zh ^= ctx->HH[lo];
-            zl ^= ctx->HL[lo];
-
-        }
-
-        rem = (unsigned char) zl & 0xf;
-        zl = (zh << 60) | (zl >> 4);
-        zh = (zh >> 4);
-        zh ^= (uint64_t) last4[rem] << 48;
-        zh ^= ctx->HH[hi];
-        zl ^= ctx->HL[hi];
+#if defined(MBEDTLS_GCM_LARGETABLE)
+        case MBEDTLS_GCM_ACC_LARGETABLE:
+            gcm_mult_largetable(output, x, ctx->H);
+            break;
+#else
+        case MBEDTLS_GCM_ACC_SMALLTABLE:
+            gcm_mult_smalltable(output, x, ctx->H);
+            break;
+#endif
     }
 
-    MBEDTLS_PUT_UINT32_BE(zh >> 32, output, 0);
-    MBEDTLS_PUT_UINT32_BE(zh, output, 4);
-    MBEDTLS_PUT_UINT32_BE(zl >> 32, output, 8);
-    MBEDTLS_PUT_UINT32_BE(zl, output, 12);
+    return;
 }
 
 int mbedtls_gcm_starts(mbedtls_gcm_context *ctx,
diff --git a/tests/scripts/all.sh b/tests/scripts/all.sh
index 8d3b46e..a21566c 100755
--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@@ -4941,6 +4941,58 @@
     programs/test/selftest
 }
 
+component_test_gcm_largetable () {
+    msg "build: default config + GCM_LARGETABLE - AESNI_C - AESCE_C"
+    scripts/config.py set MBEDTLS_GCM_LARGETABLE
+    scripts/config.py unset MBEDTLS_PADLOCK_C
+    scripts/config.py unset MBEDTLS_AESNI_C
+    scripts/config.py unset MBEDTLS_AESCE_C
+
+    make CFLAGS='-O2 -Werror -Wall -Wextra'
+
+    msg "test: default config + GCM_LARGETABLE - AESNI_C - AESCE_C"
+    make test
+}
+
+component_test_gcm_largetable_gcc () {
+    msg "build: default config + GCM_LARGETABLE - AESNI_C - AESCE_C"
+    scripts/config.py set MBEDTLS_GCM_LARGETABLE
+    scripts/config.py unset MBEDTLS_PADLOCK_C
+    scripts/config.py unset MBEDTLS_AESNI_C
+    scripts/config.py unset MBEDTLS_AESCE_C
+
+    make CC=gcc CFLAGS='-O2 -Werror -Wall -Wextra'
+
+    msg "test: default config - GCM_LARGETABLE - AESNI_C - AESCE_C"
+    make test
+}
+
+component_test_gcm_smalltable () {
+    msg "build: default config - GCM_LARGETABLE - AESNI_C - AESCE_C"
+    scripts/config.py unset MBEDTLS_GCM_LARGETABLE
+    scripts/config.py unset MBEDTLS_PADLOCK_C
+    scripts/config.py unset MBEDTLS_AESNI_C
+    scripts/config.py unset MBEDTLS_AESCE_C
+
+    make CFLAGS='-O2 -Werror -Wall -Wextra'
+
+    msg "test: default config - GCM_LARGETABLE - AESNI_C - AESCE_C"
+    make test
+}
+
+component_test_gcm_smalltable_gcc () {
+    msg "build: default config - GCM_LARGETABLE - AESNI_C - AESCE_C"
+    scripts/config.py unset MBEDTLS_GCM_LARGETABLE
+    scripts/config.py unset MBEDTLS_PADLOCK_C
+    scripts/config.py unset MBEDTLS_AESNI_C
+    scripts/config.py unset MBEDTLS_AESCE_C
+
+    make CC=gcc CFLAGS='-O2 -Werror -Wall -Wextra'
+
+    msg "test: default config - GCM_LARGETABLE - AESNI_C - AESCE_C"
+    make test
+}
+
 component_test_aes_fewer_tables () {
     msg "build: default config with AES_FEWER_TABLES enabled"
     scripts/config.py set MBEDTLS_AES_FEWER_TABLES