Fix unaligned access on old compilers
Add an alternative implementation of unaligned access that is efficient
for IAR and old versions of gcc.
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
diff --git a/library/alignment.h b/library/alignment.h
index 219f4f0..e7318c2 100644
--- a/library/alignment.h
+++ b/library/alignment.h
@@ -45,6 +45,46 @@
#define MBEDTLS_EFFICIENT_UNALIGNED_ACCESS
#endif
+#if defined(__IAR_SYSTEMS_ICC__) && \
+ (defined(MBEDTLS_ARCH_IS_ARM64) || defined(MBEDTLS_ARCH_IS_ARM32) \
+ || defined(__ICCRX__) || defined(__ICCRL78__) || defined(__ICCRISCV__))
+#pragma language=save
+#pragma language=extended
+#define MBEDTLS_POP_IAR_LANGUAGE_PRAGMA
+/* IAR recommend this technique for accessing unaligned data in
+ * https://www.iar.com/knowledge/support/technical-notes/compiler/accessing-unaligned-data
+ * This results in a single load / store instruction (if unaligned access is supported).
+ * According to that document, this is only supported on certain architectures.
+ */
+ #define UINT_UNALIGNED
+typedef uint16_t __packed mbedtls_uint16_unaligned_t;
+typedef uint32_t __packed mbedtls_uint32_unaligned_t;
+typedef uint64_t __packed mbedtls_uint64_unaligned_t;
+#elif defined(MBEDTLS_COMPILER_IS_GCC) && (MBEDTLS_GCC_VERSION >= 40504) && \
+ ((MBEDTLS_GCC_VERSION < 90300) || (!defined(MBEDTLS_EFFICIENT_UNALIGNED_ACCESS)))
+/*
+ * Old versions of gcc, depending on how the target is specified, may generate a branch to memcpy
+ * for calls like `memcpy(dest, src, 4)` rather than generating some LDR or LDRB instructions
+ * (similar for stores).
+ * Recent versions where unaligned access is not enabled also do this.
+ *
+ * For performance (and code size, in some cases), we want to avoid the branch and just generate
+ * some inline load/store instructions since the access is small and constant-size.
+ *
+ * The manual states:
+ * "The aligned attribute specifies a minimum alignment for the variable or structure field,
+ * measured in bytes."
+ * https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html
+ *
+ * Tested with several versions of GCC from 4.5.0 up to 9.3.0
+ * We don't enable for older than 4.5.0 as this has not been tested.
+ */
+ #define UINT_UNALIGNED
+typedef uint16_t __attribute__((__aligned__(1))) mbedtls_uint16_unaligned_t;
+typedef uint32_t __attribute__((__aligned__(1))) mbedtls_uint32_unaligned_t;
+typedef uint64_t __attribute__((__aligned__(1))) mbedtls_uint64_unaligned_t;
+ #endif
+
/**
* Read the unsigned 16 bits integer from the given address, which need not
* be aligned.
@@ -55,7 +95,12 @@
inline uint16_t mbedtls_get_unaligned_uint16(const void *p)
{
uint16_t r;
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+ r = *p16;
+#else
memcpy(&r, p, sizeof(r));
+#endif
return r;
}
@@ -68,7 +113,12 @@
*/
inline void mbedtls_put_unaligned_uint16(void *p, uint16_t x)
{
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint16_unaligned_t *p16 = (mbedtls_uint16_unaligned_t *) p;
+ *p16 = x;
+#else
memcpy(p, &x, sizeof(x));
+#endif
}
/**
@@ -81,7 +131,12 @@
inline uint32_t mbedtls_get_unaligned_uint32(const void *p)
{
uint32_t r;
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+ r = *p32;
+#else
memcpy(&r, p, sizeof(r));
+#endif
return r;
}
@@ -94,7 +149,12 @@
*/
inline void mbedtls_put_unaligned_uint32(void *p, uint32_t x)
{
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint32_unaligned_t *p32 = (mbedtls_uint32_unaligned_t *) p;
+ *p32 = x;
+#else
memcpy(p, &x, sizeof(x));
+#endif
}
/**
@@ -107,7 +167,12 @@
inline uint64_t mbedtls_get_unaligned_uint64(const void *p)
{
uint64_t r;
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+ r = *p64;
+#else
memcpy(&r, p, sizeof(r));
+#endif
return r;
}
@@ -120,9 +185,18 @@
*/
inline void mbedtls_put_unaligned_uint64(void *p, uint64_t x)
{
+#if defined(UINT_UNALIGNED)
+ mbedtls_uint64_unaligned_t *p64 = (mbedtls_uint64_unaligned_t *) p;
+ *p64 = x;
+#else
memcpy(p, &x, sizeof(x));
+#endif
}
+#if defined(MBEDTLS_POP_IAR_LANGUAGE_PRAGMA)
+#pragma language=restore
+#endif
+
/** Byte Reading Macros
*
* Given a multi-byte integer \p x, MBEDTLS_BYTE_n retrieves the n-th