Fix xor fail for large block size
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/common.h b/library/common.h
index 9c09602..94b8c5d 100644
--- a/library/common.h
+++ b/library/common.h
@@ -131,10 +131,10 @@
#if defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)
#if defined(__aarch64__) && defined(__ARM_NEON)
for (; (i + 16) <= n; i += 16) {
- uint64x2_t v1 = vld1q_u64((uint64_t *) a);
- uint64x2_t v2 = vld1q_u64((uint64_t *) b);
+ uint64x2_t v1 = vld1q_u64((uint64_t *) (a + i));
+ uint64x2_t v2 = vld1q_u64((uint64_t *) (b + i));
uint64x2_t x = veorq_u64(v1, v2);
- vst1q_u64((uint64_t *) r, x);
+ vst1q_u64((uint64_t *) (r + i), x);
}
#elif defined(__amd64__) || defined(__x86_64__) || defined(__aarch64__)
/* This codepath probably only makes sense on architectures with 64-bit registers */