Improve readability of unrolled AESCE code
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/aesce.c b/library/aesce.c
index 6b493a2..600326a 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -101,59 +101,36 @@
#endif
}
+/* Single round of AESCE encryption */
+#define AESCE_ENCRYPT_ROUND \
+ block = vaeseq_u8(block, vld1q_u8(keys)); \
+ block = vaesmcq_u8(block); \
+ keys += 16
+/* Two rounds of AESCE encryption */
+#define AESCE_ENCRYPT_ROUND_X2 AESCE_ENCRYPT_ROUND; AESCE_ENCRYPT_ROUND
+
MBEDTLS_OPTIMIZE_ALWAYS
static uint8x16_t aesce_encrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
- /* Assume either 10, 12 or 14 rounds */
+ /* Assume either 10, 12 or 14 rounds.
+ * Skip 4 or 2 rounds, if doing 10 or 12 rounds */
if (rounds == 10) {
goto rounds_10;
}
if (rounds == 12) {
goto rounds_12;
}
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
+ AESCE_ENCRYPT_ROUND_X2;
rounds_12:
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
+ AESCE_ENCRYPT_ROUND_X2;
rounds_10:
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
- block = vaeseq_u8(block, vld1q_u8(keys));
- block = vaesmcq_u8(block);
- keys += 16;
+ AESCE_ENCRYPT_ROUND_X2;
+ AESCE_ENCRYPT_ROUND_X2;
+ AESCE_ENCRYPT_ROUND_X2;
+ AESCE_ENCRYPT_ROUND_X2;
+ AESCE_ENCRYPT_ROUND;
/* AES AddRoundKey for the previous round.
* SubBytes, ShiftRows for the final round. */
@@ -168,74 +145,56 @@
return block;
}
+/* Single round of AESCE decryption
+ *
+ * AES AddRoundKey, SubBytes, ShiftRows
+ *
+ * block = vaesdq_u8(block, vld1q_u8(keys));
+ *
+ * AES inverse MixColumns for the next round.
+ *
+ * This means that we switch the order of the inverse AddRoundKey and
+ * inverse MixColumns operations. We have to do this as AddRoundKey is
+ * done in an atomic instruction together with the inverses of SubBytes
+ * and ShiftRows.
+ *
+ * It works because MixColumns is a linear operation over GF(2^8) and
+ * AddRoundKey is an exclusive or, which is equivalent to addition over
+ * GF(2^8). (The inverse of MixColumns needs to be applied to the
+ * affected round keys separately which has been done when the
+ * decryption round keys were calculated.)
+ *
+ * block = vaesimcq_u8(block);
+ */
+#define AESCE_DECRYPT_ROUND \
+ block = vaesdq_u8(block, vld1q_u8(keys)); \
+ block = vaesimcq_u8(block); \
+ keys += 16
+/* Two rounds of AESCE decryption */
+#define AESCE_DECRYPT_ROUND_X2 AESCE_DECRYPT_ROUND; AESCE_DECRYPT_ROUND
+
MBEDTLS_OPTIMIZE_ALWAYS
static uint8x16_t aesce_decrypt_block(uint8x16_t block,
unsigned char *keys,
int rounds)
{
- /* Assume either 10, 12 or 14 rounds */
+ /* Assume either 10, 12 or 14 rounds.
+ * Skip 4 or 2 rounds, if doing 10 or 12 rounds */
if (rounds == 10) {
goto rounds_10;
}
if (rounds == 12) {
goto rounds_12;
}
-
- /* AES AddRoundKey, SubBytes, ShiftRows */
- block = vaesdq_u8(block, vld1q_u8(keys));
- /* AES inverse MixColumns for the next round.
- *
- * This means that we switch the order of the inverse AddRoundKey and
- * inverse MixColumns operations. We have to do this as AddRoundKey is
- * done in an atomic instruction together with the inverses of SubBytes
- * and ShiftRows.
- *
- * It works because MixColumns is a linear operation over GF(2^8) and
- * AddRoundKey is an exclusive or, which is equivalent to addition over
- * GF(2^8). (The inverse of MixColumns needs to be applied to the
- * affected round keys separately which has been done when the
- * decryption round keys were calculated.) */
- block = vaesimcq_u8(block);
- keys += 16;
-
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
+ AESCE_DECRYPT_ROUND_X2;
rounds_12:
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
+ AESCE_DECRYPT_ROUND_X2;
rounds_10:
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
- block = vaesdq_u8(block, vld1q_u8(keys));
- block = vaesimcq_u8(block);
- keys += 16;
+ AESCE_DECRYPT_ROUND_X2;
+ AESCE_DECRYPT_ROUND_X2;
+ AESCE_DECRYPT_ROUND_X2;
+ AESCE_DECRYPT_ROUND_X2;
+ AESCE_DECRYPT_ROUND;
/* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the
* last full round. */