roll-up chi loop on clang
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/sha3.c b/library/sha3.c
index 9585d01..97d45d3 100644
--- a/library/sha3.c
+++ b/library/sha3.c
@@ -23,7 +23,7 @@
#undef MBEDTLS_SHA3_THETA_UNROLL //no-check-names
#define MBEDTLS_SHA3_RHO_UNROLL //no-check-names
#define MBEDTLS_SHA3_PI_UNROLL //no-check-names
-
+#undef MBEDTLS_SHA3_CHI_UNROLL //no-check-names
#include "common.h"
@@ -156,6 +156,17 @@
#endif
/* Chi */
+#if !defined(MBEDTLS_SHA3_CHI_UNROLL) && !defined(MBEDTLS_COMPILER_IS_GCC)
+ /* GCC doesn't perform well with the rolled-up version, especially at -O2. */
+ for (i = 0; i <= 20; i += 5) {
+ lane[0] = s[i]; lane[1] = s[i + 1]; lane[2] = s[i + 2]; lane[3] = s[i + 3]; lane[4] = s[i + 4];
+ s[i + 0] ^= (~lane[1]) & lane[2];
+ s[i + 1] ^= (~lane[2]) & lane[3];
+ s[i + 2] ^= (~lane[3]) & lane[4];
+ s[i + 3] ^= (~lane[4]) & lane[0];
+ s[i + 4] ^= (~lane[0]) & lane[1];
+ }
+#else
lane[0] = s[0]; lane[1] = s[1]; lane[2] = s[2]; lane[3] = s[3]; lane[4] = s[4];
s[0] ^= (~lane[1]) & lane[2];
s[1] ^= (~lane[2]) & lane[3];
@@ -190,6 +201,7 @@
s[22] ^= (~lane[3]) & lane[4];
s[23] ^= (~lane[4]) & lane[0];
s[24] ^= (~lane[0]) & lane[1];
+#endif
/* Iota */
s[0] ^= rc[round];