Improve mbedtls_xor for IAR

Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/common.h b/library/common.h
index e532777..5c73e8a 100644
--- a/library/common.h
+++ b/library/common.h
@@ -191,21 +191,30 @@
         uint8x16_t x = veorq_u8(v1, v2);
         vst1q_u8(r + i, x);
     }
+    // This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
+    // where n is a constant multiple of 16.
+    // It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
+    // constant, and very little difference if n is not a compile-time constant.
+    if (n % 16 != 0)
 #elif defined(MBEDTLS_ARCH_IS_X64) || defined(MBEDTLS_ARCH_IS_ARM64)
     /* This codepath probably only makes sense on architectures with 64-bit registers */
     for (; (i + 8) <= n; i += 8) {
         uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
         mbedtls_put_unaligned_uint64(r + i, x);
     }
+    if (n % 8 != 0)
 #else
     for (; (i + 4) <= n; i += 4) {
         uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
         mbedtls_put_unaligned_uint32(r + i, x);
     }
+    if (n % 4 != 0)
 #endif
 #endif
-    for (; i < n; i++) {
-        r[i] = a[i] ^ b[i];
+    {
+        for (; i < n; i++) {
+            r[i] = a[i] ^ b[i];
+        }
     }
 }
 
@@ -236,15 +245,23 @@
         uint64_t x = mbedtls_get_unaligned_uint64(a + i) ^ mbedtls_get_unaligned_uint64(b + i);
         mbedtls_put_unaligned_uint64(r + i, x);
     }
+    // This if statement helps some compilers (e.g., IAR) optimise out the byte-by-byte tail case
+    // where n is a constant multiple of 8.
+    // It makes no difference for others (e.g. recent gcc and clang) if n is a compile-time
+    // constant, and very little difference if n is not a compile-time constant.
+    if (n % 8 != 0)
 #else
     for (; (i + 4) <= n; i += 4) {
         uint32_t x = mbedtls_get_unaligned_uint32(a + i) ^ mbedtls_get_unaligned_uint32(b + i);
         mbedtls_put_unaligned_uint32(r + i, x);
     }
+    if (n % 4 != 0)
 #endif
 #endif
-    for (; i < n; i++) {
-        r[i] = a[i] ^ b[i];
+    {
+        for (; i < n; i++) {
+            r[i] = a[i] ^ b[i];
+        }
     }
 }