Consume input in 8-byte chunks

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/sha3.c b/library/sha3.c
index 959928e..d875b81 100644
--- a/library/sha3.c
+++ b/library/sha3.c
@@ -70,6 +70,8 @@
 #define ROT64(x, y) (((x) << (y)) | ((x) >> (64U - (y))))
 #define ABSORB(ctx, idx, v) do { ctx->state[(idx) >> 3] ^= ((uint64_t) (v)) << (((idx) & 0x7) << 3); \
 } while (0)
+#define ABSORB8(ctx, idx, v) do { ctx->state[(idx) >> 3] ^= ((uint64_t) (v)) << ((idx) << 3); \
+} while (0)
 #define SQUEEZE(ctx, idx) ((uint8_t) (ctx->state[(idx) >> 3] >> (((idx) & 0x7) << 3)))
 #define SWAP(x, y) do { uint64_t tmp = (x); (x) = (y); (y) = tmp; } while (0)
 
@@ -232,6 +234,32 @@
         return 0;
     }
 
+    if (ilen >= 8) {
+        // 8-byte align index
+        int align_bytes = 8 - (ctx->index % 8);
+        if (align_bytes) {
+            for (; align_bytes > 0; align_bytes--) {
+                ABSORB(ctx, ctx->index, *input++);
+                ilen--;
+                ctx->index++;
+            }
+            if ((ctx->index = ctx->index % ctx->max_block_size) == 0) {
+                keccak_f1600(ctx);
+            }
+        }
+
+        // process input in 8-byte chunks
+        while (ilen >= 8) {
+            ABSORB8(ctx, ctx->index, mbedtls_get_unaligned_uint64(input));
+            input += 8;
+            ilen -= 8;
+            if ((ctx->index = (ctx->index + 8) % ctx->max_block_size) == 0) {
+                keccak_f1600(ctx);
+            }
+        }
+    }
+
+    // handle remaining bytes
     while (ilen-- > 0) {
         ABSORB(ctx, ctx->index, *input++);
         if ((ctx->index = (ctx->index + 1) % ctx->max_block_size) == 0) {