Add GCM support
Signed-off-by: Jerry Yu <jerry.h.yu@arm.com>
diff --git a/library/aesce.c b/library/aesce.c
index acfac23..011c989 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -276,6 +276,69 @@
return 0;
}
+#if defined(MBEDTLS_GCM_C)
+
+static inline uint8x16_t pmull_low(uint8x16_t a, uint8x16_t b)
+{
+ return vreinterpretq_u8_p128(
+ vmull_p64(
+ (poly64_t) vget_low_p64(vreinterpretq_p64_u8(a)),
+ (poly64_t) vget_low_p64(vreinterpretq_p64_u8(b))));
+}
+
+static inline uint8x16_t pmull_high(uint8x16_t a, uint8x16_t b)
+{
+ return vreinterpretq_u8_p128(
+ vmull_high_p64(vreinterpretq_p64_u8(a),
+ vreinterpretq_p64_u8(b)));
+}
+
+static inline uint8x16x3_t poly_mult_128(uint8x16_t a, uint8x16_t b)
+{
+ uint8x16x3_t ret;
+ uint8x16_t c = vextq_u8(b, b, 8);
+ ret.val[0] = pmull_high(a, b); /* a1*b1 */
+ ret.val[1] = veorq_u8(pmull_high(a, c), /* a1*b0 + a0*b1 */
+ pmull_low(a, c));
+ ret.val[2] = pmull_low(a, b); /* a0*b0 */
+ return ret;
+}
+
+static inline uint8x16_t poly_mult_reduce(uint8x16x3_t a)
+{
+ uint8x16_t const Z = vdupq_n_u8(0);
+ /* use 'asm' as an optimisation barrier to prevent loading R from memory */
+ uint64x2_t r = vreinterpretq_u64_u8(vdupq_n_u8(0x87));
+ asm ("" : "+w" (r));
+ uint8x16_t const R = vreinterpretq_u8_u64(vshrq_n_u64(r, 64 - 8));
+ uint8x16_t d = a.val[0]; /* d3:d2:00:00 */
+ uint8x16_t j = a.val[1]; /* j2:j1:00 */
+ uint8x16_t g = a.val[2]; /* g1:g0 = a0*b0 */
+ uint8x16_t h = pmull_high(d, R); /* h2:h1:00 = reduction of d3 */
+ uint8x16_t i = pmull_low(d, R); /* i1:i0 = reduction of d2 */
+ uint8x16_t k = veorq_u8(j, h); /* k2:k1:00 = j2:j1 + h2:h1 */
+ uint8x16_t l = pmull_high(k, R); /* l1:l0 = reduction of k2 */
+ uint8x16_t m = vextq_u8(Z, k, 8); /* m1:00 = k1:00 */
+ uint8x16_t n = veorq_u8(g, i); /* n1:n0 = g1:g0 + i1:i0 */
+ uint8x16_t o = veorq_u8(n, l); /* o1:o0 = l1:l0 + n1:n0 */
+ return veorq_u8(o, m); /* = o1:o0 + m1:00 */
+}
+
+/*
+ * GCM multiplication: c = a times b in GF(2^128)
+ */
+void mbedtls_aesce_gcm_mult(unsigned char c[16],
+ const unsigned char a[16],
+ const unsigned char b[16])
+{
+ uint8x16_t va, vb, vc;
+ va = vrbitq_u8(vld1q_u8(&a[0]));
+ vb = vrbitq_u8(vld1q_u8(&b[0]));
+ vc = vrbitq_u8(poly_mult_reduce(poly_mult_128(va, vb)));
+ vst1q_u8(&c[0], vc);
+}
+
+#endif /* MBEDTLS_GCM_C */
#if defined(MBEDTLS_POP_TARGET_PRAGMA)
#if defined(__clang__)
diff --git a/library/aesce.h b/library/aesce.h
index da42446..1b3f816 100644
--- a/library/aesce.h
+++ b/library/aesce.h
@@ -65,6 +65,24 @@
unsigned char output[16]);
/**
+ * \brief Internal GCM multiplication: c = a * b in GF(2^128)
+ *
+ * \note This function is only for internal use by other library
+ * functions; you must not call it directly.
+ *
+ * \param c Result
+ * \param a First operand
+ * \param b Second operand
+ *
+ * \note Both operands and result are bit strings interpreted as
+ * elements of GF(2^128) as per the GCM spec.
+ */
+void mbedtls_aesce_gcm_mult(unsigned char c[16],
+ const unsigned char a[16],
+ const unsigned char b[16]);
+
+
+/**
* \brief Internal round key inversion. This function computes
* decryption round keys from the encryption round keys.
*
diff --git a/library/gcm.c b/library/gcm.c
index 6d4495f..0fa0008 100644
--- a/library/gcm.c
+++ b/library/gcm.c
@@ -42,6 +42,10 @@
#include "aesni.h"
#endif
+#if defined(MBEDTLS_AESCE_C)
+#include "aesce.h"
+#endif
+
#if !defined(MBEDTLS_GCM_ALT)
/*
@@ -93,6 +97,12 @@
}
#endif
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
+ if (mbedtls_aesce_has_support()) {
+ return 0;
+ }
+#endif
+
/* 0 corresponds to 0 in GF(2^128) */
ctx->HH[0] = 0;
ctx->HL[0] = 0;
@@ -197,6 +207,20 @@
}
#endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */
+#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64)
+ if (mbedtls_aesce_has_support()) {
+ unsigned char h[16];
+
+ MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h, 0);
+ MBEDTLS_PUT_UINT32_BE(ctx->HH[8], h, 4);
+ MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h, 8);
+ MBEDTLS_PUT_UINT32_BE(ctx->HL[8], h, 12);
+
+ mbedtls_aesce_gcm_mult(output, x, h);
+ return;
+ }
+#endif
+
lo = x[15] & 0xf;
zh = ctx->HH[lo];