Unroll aesce_decrypt_block
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/aesce.c b/library/aesce.c
index abd47b1..e21e3b3 100644
--- a/library/aesce.c
+++ b/library/aesce.c
@@ -171,31 +171,77 @@
unsigned char *keys,
int rounds)
{
-
- for (int i = 0; i < rounds - 1; i++) {
- /* AES AddRoundKey, SubBytes, ShiftRows */
- block = vaesdq_u8(block, vld1q_u8(keys + i * 16));
- /* AES inverse MixColumns for the next round.
- *
- * This means that we switch the order of the inverse AddRoundKey and
- * inverse MixColumns operations. We have to do this as AddRoundKey is
- * done in an atomic instruction together with the inverses of SubBytes
- * and ShiftRows.
- *
- * It works because MixColumns is a linear operation over GF(2^8) and
- * AddRoundKey is an exclusive or, which is equivalent to addition over
- * GF(2^8). (The inverse of MixColumns needs to be applied to the
- * affected round keys separately which has been done when the
- * decryption round keys were calculated.) */
- block = vaesimcq_u8(block);
+ /* Assume either 10, 12 or 14 rounds */
+ if (rounds == 10) {
+ goto rounds_10;
}
+ if (rounds == 12) {
+ goto rounds_12;
+ }
+
+ /* AES AddRoundKey, SubBytes, ShiftRows */
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ /* AES inverse MixColumns for the next round.
+ *
+ * This means that we switch the order of the inverse AddRoundKey and
+ * inverse MixColumns operations. We have to do this as AddRoundKey is
+ * done in an atomic instruction together with the inverses of SubBytes
+ * and ShiftRows.
+ *
+ * It works because MixColumns is a linear operation over GF(2^8) and
+ * AddRoundKey is an exclusive or, which is equivalent to addition over
+ * GF(2^8). (The inverse of MixColumns needs to be applied to the
+ * affected round keys separately which has been done when the
+ * decryption round keys were calculated.) */
+ block = vaesimcq_u8(block);
+ keys += 16;
+
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+rounds_12:
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+rounds_10:
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ block = vaesimcq_u8(block);
+ keys += 16;
/* The inverses of AES AddRoundKey, SubBytes, ShiftRows finishing up the
* last full round. */
- block = vaesdq_u8(block, vld1q_u8(keys + (rounds - 1) * 16));
+ block = vaesdq_u8(block, vld1q_u8(keys));
+ keys += 16;
/* Inverse AddRoundKey for inverting the initial round key addition. */
- block = veorq_u8(block, vld1q_u8(keys + rounds * 16));
+ block = veorq_u8(block, vld1q_u8(keys));
return block;
}