Limit compiler hint to compilers that are known to benefit from it
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/common.h b/library/common.h
index 2eb9170..937c802 100644
--- a/library/common.h
+++ b/library/common.h
@@ -199,30 +199,40 @@
uint8x16_t x = veorq_u8(v1, v2);
vst1q_u8(r + i, x);
}
+#if defined(__IAR_SYSTEMS_ICC__)
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
* where n is a constant multiple of 16.
- * It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
- * constant, and very little difference if n is not a compile-time constant. */
- if (n % 16 != 0)
+ * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
+ * constant, and is a very small perf regression if n is not a compile-time constant. */
+ if (n % 16 == 0) {
+ return;
+ }
+#endif
#elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64)
/* This codepath probably only makes sense on architectures with 64-bit registers */
for (; (i + 8) <= n; i += 8) {
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
mbedtls_put_unaligned_uint64(r + i, x);
}
- if (n % 8 != 0)
+#if defined(__IAR_SYSTEMS_ICC__)
+ if (n % 8 == 0) {
+ return;
+ }
+#endif
#else
for (; (i + 4) <= n; i += 4) {
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
mbedtls_put_unaligned_uint32(r + i, x);
}
- if (n % 4 != 0)
+#if defined(__IAR_SYSTEMS_ICC__)
+ if (n % 4 == 0) {
+ return;
+ }
#endif
#endif
- {
- for (; i < n; i++) {
- r[i] = a[i] ^ b[i];
- }
+#endif
+ for (; i < n; i++) {
+ r[i] = a[i] ^ b[i];
}
}
@@ -268,23 +278,29 @@
uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
mbedtls_put_unaligned_uint64(r + i, x);
}
+#if defined(__IAR_SYSTEMS_ICC__)
/* This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
* where n is a constant multiple of 16.
- * It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
- * constant, and very little difference if n is not a compile-time constant. */
- if (n % 8 != 0)
+ * For other compilers (e.g. recent gcc and clang) it makes no difference if n is a compile-time
+ * constant, and is a very small perf regression if n is not a compile-time constant. */
+ if (n % 8 == 0) {
+ return;
+ }
+#endif
#else
for (; (i + 4) <= n; i += 4) {
uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
mbedtls_put_unaligned_uint32(r + i, x);
}
- if (n % 4 != 0)
+#if defined(__IAR_SYSTEMS_ICC__)
+ if (n % 4 == 0) {
+ return;
+ }
#endif
#endif
- {
- for (; i < n; i++) {
- r[i] = a[i] ^ b[i];
- }
+#endif
+ for (; i < n; i++) {
+ r[i] = a[i] ^ b[i];
}
}