Double perf for AES-XEX
As seen from the first benchmark run, AES-XEX was running pourly (even
slower than AES-CBC). This commit doubles the performances of the
current implementation.
diff --git a/library/aes.c b/library/aes.c
index 1f21587..1c69c97 100644
--- a/library/aes.c
+++ b/library/aes.c
@@ -999,36 +999,45 @@
const unsigned char *input,
unsigned char *output )
{
- int i;
- unsigned char t_buf[16];
- unsigned char scratch[16];
+ union xex_buf128 {
+ uint8_t u8[16];
+ uint64_t u64[2];
+ };
+
+ union xex_buf128 scratch;
+ union xex_buf128 t_buf;
+ union xex_buf128 *inbuf;
+ union xex_buf128 *outbuf;
+
+ inbuf = (union xex_buf128*)input;
+ outbuf = (union xex_buf128*)output;
if( length % 16 )
return( MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH );
- mbedtls_aes_crypt_ecb( tweak_ctx, MBEDTLS_AES_ENCRYPT, iv, t_buf );
+ mbedtls_aes_crypt_ecb( tweak_ctx, MBEDTLS_AES_ENCRYPT, iv, t_buf.u8 );
goto first;
do
{
- mbedtls_gf128mul_x_ble( t_buf, t_buf );
+ mbedtls_gf128mul_x_ble( t_buf.u8, t_buf.u8 );
first:
/* PP <- T xor P */
- for( i = 0; i < 16; i++ )
- scratch[i] = (unsigned char)( input[i] ^ t_buf[i] );
+ scratch.u64[0] = (uint64_t)( inbuf->u64[0] ^ t_buf.u64[0] );
+ scratch.u64[1] = (uint64_t)( inbuf->u64[1] ^ t_buf.u64[1] );
/* CC <- E(Key2,PP) */
- mbedtls_aes_crypt_ecb( crypt_ctx, mode, scratch, output );
+ mbedtls_aes_crypt_ecb( crypt_ctx, mode, scratch.u8, outbuf->u8 );
/* C <- T xor CC */
- for( i = 0; i < 16; i++ )
- output[i] = (unsigned char)( output[i] ^ t_buf[i] );
+ outbuf->u64[0] = (uint64_t)( outbuf->u64[0] ^ t_buf.u64[0] );
+ outbuf->u64[1] = (uint64_t)( outbuf->u64[1] ^ t_buf.u64[1] );
- input += 16;
- output += 16;
+ inbuf += 1;
+ outbuf += 1;
length -= 16;
} while( length > 0 );