SPM: Share 'memcpy' and 'memset' with partitions

TF-M applies isolation rule I3, which allows code-sharing between
cross-domain components. This makes separating fundamental APIs
such as memset/memcpy less significant. This patch shares these
two fundamental APIs memset/memcpy between SPM and partitions.

A slight optimization is made on the implementation - removes
unnecessary half-word copying.

Change-Id: I7d4c931aefd94d56468806ab768048da156e4656
Signed-off-by: Ken Liu <Ken.Liu@arm.com>
diff --git a/secure_fw/shared/crt_memcpy.c b/secure_fw/shared/crt_memcpy.c
new file mode 100644
index 0000000..0675c2a
--- /dev/null
+++ b/secure_fw/shared/crt_memcpy.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2019-2022, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include "crt_impl_private.h"
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    union composite_addr_t p_dst, p_src;
+
+    p_dst.uint_addr = (uintptr_t)dest;
+    p_src.uint_addr = (uintptr_t)src;
+
+    /* Byte copy for unaligned address. check the last bit of address. */
+    while (n && (ADDR_WORD_UNALIGNED(p_dst.uint_addr) ||
+                 ADDR_WORD_UNALIGNED(p_src.uint_addr))) {
+        *p_dst.p_byte++ = *p_src.p_byte++;
+        n--;
+    }
+
+    /* Quad byte copy for aligned address. */
+    while (n >= sizeof(uint32_t)) {
+        *(p_dst.p_word)++ = *(p_src.p_word)++;
+        n -= sizeof(uint32_t);
+    }
+
+    /* Byte copy for the remaining bytes. */
+    while (n--) {
+        *p_dst.p_byte++ = *p_src.p_byte++;
+    }
+
+    return dest;
+}