Clean up AES context alignment code

Use a single auxiliary function to determine rk_offset, covering both
setkey_enc and setkey_dec, covering both AESNI and PADLOCK. For AESNI, only
build this when using the intrinsics-based implementation, since the
assembly implementation supports unaligned access.

Simplify "do we need to realign?" to "is the desired offset now equal to
the current offset?".

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
diff --git a/library/aes.c b/library/aes.c
index b4b3423..91b6d43 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -504,6 +504,53 @@
 }
 #endif /* MBEDTLS_CIPHER_MODE_XTS */
 
+/* Some implementations need the round keys to be aligned.
+ * Return an offset to be added to buf, such that (buf + offset) is
+ * correctly aligned.
+ * Note that the offset is in units of elements of buf, i.e. 32-bit words,
+ * i.e. an offset of 1 means 4 bytes and so on.
+ */
+#if (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)) ||        \
+    defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+#define MAY_NEED_TO_ALIGN
+#endif
+static unsigned mbedtls_aes_rk_offset(uint32_t *buf)
+{
+#if defined(MAY_NEED_TO_ALIGN)
+    int align_16_bytes = 0;
+
+#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
+    if (aes_padlock_ace == -1) {
+        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
+    }
+    if (aes_padlock_ace) {
+        align_16_bytes = 1;
+    }
+#endif
+
+#if defined(MBEDTLS_AESNI_C) && defined(MBEDTLS_HAVE_AESNI_INTRINSICS)
+    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
+        align_16_bytes = 1;
+    }
+#endif
+
+    if (align_16_bytes) {
+        /* These implementations needs 16-byte alignment
+         * for the round key array. */
+        unsigned delta = ((uintptr_t) buf & 0x0000000fU) / 4;
+        if (delta == 0) {
+            return 0;
+        } else {
+            return 4 - delta; // 16 bytes = 4 uint32_t
+        }
+    }
+#else /* MAY_NEED_TO_ALIGN */
+    (void) buf;
+#endif /* MAY_NEED_TO_ALIGN */
+
+    return 0;
+}
+
 /*
  * AES key schedule (encryption)
  */
@@ -528,27 +575,11 @@
     }
 #endif
 
-    ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
-    if (aes_padlock_ace == -1) {
-        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
-    }
-
-    if (aes_padlock_ace) {
-        ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf;
-    }
-#endif
+    ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
     RK = ctx->buf + ctx->rk_offset;
 
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        /* The intrinsics-based implementation needs 16-byte alignment
-         * for the round key array. */
-        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
-        if (delta != 0) {
-            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
-        }
-        RK = ctx->buf + ctx->rk_offset;
         return mbedtls_aesni_setkey_enc((unsigned char *) RK, key, keybits);
     }
 #endif
@@ -640,26 +671,7 @@
 
     mbedtls_aes_init(&cty);
 
-    ctx->rk_offset = 0;
-#if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
-    if (aes_padlock_ace == -1) {
-        aes_padlock_ace = mbedtls_padlock_has_support(MBEDTLS_PADLOCK_ACE);
-    }
-
-    if (aes_padlock_ace) {
-        ctx->rk_offset = MBEDTLS_PADLOCK_ALIGN16(ctx->buf) - ctx->buf;
-    }
-#endif
-#if defined(MBEDTLS_AESNI_HAVE_CODE)
-    if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        /* The intrinsics-based implementation needs 16-byte alignment
-         * for the round key array. */
-        unsigned delta = (uintptr_t) ctx->buf & 0x0000000f;
-        if (delta != 0) {
-            ctx->rk_offset = 4 - delta / 4; // 16 bytes = 4 uint32_t
-        }
-    }
-#endif
+    ctx->rk_offset = mbedtls_aes_rk_offset(ctx->buf);
     RK = ctx->buf + ctx->rk_offset;
 
     /* Also checks keybits */
@@ -961,8 +973,7 @@
 }
 #endif /* !MBEDTLS_AES_DECRYPT_ALT */
 
-#if defined(MBEDTLS_AESNI_HAVE_CODE) || \
-    (defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86))
+#if defined(MAY_NEED_TO_ALIGN)
 /* VIA Padlock and our intrinsics-based implementation of AESNI require
  * the round keys to be aligned on a 16-byte boundary. We take care of this
  * before creating them, but the AES context may have moved (this can happen
@@ -972,16 +983,8 @@
  */
 static void aes_maybe_realign(mbedtls_aes_context *ctx)
 {
-    /* We want a 16-byte alignment. Note that buf is a pointer to uint32_t
-     * and rk_offset is in units of uint32_t words = 4 bytes. We want a
-     * 4-word alignment. */
-    uintptr_t current_address = (uintptr_t) (ctx->buf + ctx->rk_offset);
-    unsigned current_alignment = (current_address & 0x0000000f) / 4;
-    if (current_alignment != 0) {
-        unsigned new_offset = ctx->rk_offset + 4 - current_alignment;
-        if (new_offset >= 4) {
-            new_offset -= 4;
-        }
+    unsigned new_offset = mbedtls_aes_rk_offset(ctx->buf);
+    if (new_offset != ctx->rk_offset) {
         memmove(ctx->buf + new_offset,     // new address
                 ctx->buf + ctx->rk_offset, // current address
                 (ctx->nr + 1) * 16);       // number of round keys * bytes per rk
@@ -1002,9 +1005,12 @@
         return MBEDTLS_ERR_AES_BAD_INPUT_DATA;
     }
 
+#if defined(MAY_NEED_TO_ALIGN)
+    aes_maybe_realign(ctx);
+#endif
+
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
     if (mbedtls_aesni_has_support(MBEDTLS_AESNI_AES)) {
-        aes_maybe_realign(ctx);
         return mbedtls_aesni_crypt_ecb(ctx, mode, input, output);
     }
 #endif
@@ -1017,7 +1023,6 @@
 
 #if defined(MBEDTLS_PADLOCK_C) && defined(MBEDTLS_HAVE_X86)
     if (aes_padlock_ace > 0) {
-        aes_maybe_realign(ctx);
         return mbedtls_padlock_xcryptecb(ctx, mode, input, output);
     }
 #endif