Roll/unroll various bits
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/sha3.c b/library/sha3.c
index 6b14a84..99a8acb 100644
--- a/library/sha3.c
+++ b/library/sha3.c
@@ -10,6 +10,10 @@
* https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
*/
+#undef MBEDTLS_SHA3_THETA_UNROLL
+#define MBEDTLS_SHA3_RHO_UNROLL
+#define MBEDTLS_SHA3_PI_UNROLL
+
#include "common.h"
#if defined(MBEDTLS_SHA3_C)
@@ -60,6 +64,15 @@
uint64_t t;
/* Theta */
+#if !defined(MBEDTLS_SHA3_THETA_UNROLL)
+ for (i = 0; i < 5; i++) {
+ lane[i] = s[i] ^ s[i + 5] ^ s[i + 10] ^ s[i + 15] ^ s[i + 20];
+ }
+ for (i = 0; i < 5; i++) {
+ t = lane[(i + 4) % 5] ^ ROTR64(lane[(i + 1) % 5], 63);
+ s[i] ^= t; s[i + 5] ^= t; s[i + 10] ^= t; s[i + 15] ^= t; s[i + 20] ^= t;
+ }
+#else
lane[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
lane[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
lane[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
@@ -80,19 +93,28 @@
t = lane[3] ^ ROTR64(lane[0], 63);
s[4] ^= t; s[9] ^= t; s[14] ^= t; s[19] ^= t; s[24] ^= t;
+#endif
/* Rho */
for (i = 1; i < 25; i += 4) {
uint32_t r = rho[(i - 1) >> 2];
+#if !defined(MBEDTLS_SHA3_RHO_UNROLL)
for (int j = i; j < i + 4; j++) {
uint8_t r8 = (uint8_t) (r >> 24);
r <<= 8;
s[j] = ROTR64(s[j], r8);
}
+#else
+ s[i + 0] = ROTR64(s[i + 0], MBEDTLS_BYTE_3(r));
+ s[i + 1] = ROTR64(s[i + 1], MBEDTLS_BYTE_2(r));
+ s[i + 2] = ROTR64(s[i + 2], MBEDTLS_BYTE_1(r));
+ s[i + 3] = ROTR64(s[i + 3], MBEDTLS_BYTE_0(r));
+#endif
}
/* Pi */
t = s[1];
+#if !defined(MBEDTLS_SHA3_PI_UNROLL)
for (i = 0; i < 24; i += 4) {
uint32_t p = pi[i >> 2];
for (unsigned j = 0; j < 4; j++) {
@@ -101,6 +123,26 @@
SWAP(s[p8], t);
}
}
+#else
+ uint32_t p = pi[0];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+ p = pi[1];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+ p = pi[2];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+ p = pi[3];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+ p = pi[4];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+ p = pi[5];
+ SWAP(s[MBEDTLS_BYTE_3(p)], t); SWAP(s[MBEDTLS_BYTE_2(p)], t);
+ SWAP(s[MBEDTLS_BYTE_1(p)], t); SWAP(s[MBEDTLS_BYTE_0(p)], t);
+#endif
/* Chi */
lane[0] = s[0]; lane[1] = s[1]; lane[2] = s[2]; lane[3] = s[3]; lane[4] = s[4];