SPM: Restructure memory operation functions

SPM calls spm_memcpy/spm_memset, and SP just calls memset, memcpy,
memmove and memcmp. SPM has its own header files for
spm_memcpy/spm_memset prototypes, while SP relies on toolchain headers
string.h for prototypes.
As -fno-builtin is applied, our memcpy would replace the same function
in toolchain library.

Change-Id: Iab240c96e06d55144daa0125b2d17574b648f9e1
Signed-off-by: Summer Qin <summer.qin@arm.com>
diff --git a/secure_fw/partitions/lib/sprt/CMakeLists.inc b/secure_fw/partitions/lib/sprt/CMakeLists.inc
index 3f2b798..e4f1bd7 100644
--- a/secure_fw/partitions/lib/sprt/CMakeLists.inc
+++ b/secure_fw/partitions/lib/sprt/CMakeLists.inc
@@ -25,9 +25,10 @@
 endif()
 
 set (LIBSPRT_C_SRC
-    "${LIBSPRT_DIR}/tfm_libsprt_c_memcpy.c"
-    "${LIBSPRT_DIR}/tfm_libsprt_c_memmove.c"
-    "${LIBSPRT_DIR}/tfm_libsprt_c_memcmp.c"
+    "${LIBSPRT_DIR}/crt_memcpy.c"
+    "${LIBSPRT_DIR}/crt_memmove.c"
+    "${LIBSPRT_DIR}/crt_memcmp.c"
+    "${LIBSPRT_DIR}/crt_memset.c"
     "${LIBSPRT_DIR}/service_api.c"
     "${TFM_ROOT_DIR}/interface/src/log/tfm_log_raw.c")
 
diff --git a/secure_fw/partitions/lib/sprt/crt_impl_private.h b/secure_fw/partitions/lib/sprt/crt_impl_private.h
new file mode 100644
index 0000000..b427f0c
--- /dev/null
+++ b/secure_fw/partitions/lib/sprt/crt_impl_private.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#ifndef __CRT_IMPL_PRIVATE_H__
+#define __CRT_IMPL_PRIVATE_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define GET_MEM_ADDR_BIT0(x)        ((x) & 0x1)
+#define GET_MEM_ADDR_BIT1(x)        ((x) & 0x2)
+
+union tfm_mem_addr_t {
+    uintptr_t uint_addr;        /* Address          */
+    uint8_t *p_byte;            /* Byte copy        */
+    uint16_t *p_dbyte;          /* Double byte copy */
+    uint32_t *p_qbyte;          /* Quad byte copy   */
+};
+
+#endif /* __CRT_IMPL_PRIVATE_H__ */
diff --git a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcmp.c b/secure_fw/partitions/lib/sprt/crt_memcmp.c
similarity index 76%
rename from secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcmp.c
rename to secure_fw/partitions/lib/sprt/crt_memcmp.c
index 149e18c..0370ff0 100644
--- a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcmp.c
+++ b/secure_fw/partitions/lib/sprt/crt_memcmp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  *
@@ -8,7 +8,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
-int tfm_sprt_c_memcmp(const void *s1, const void *s2, size_t n)
+int memcmp(const void *s1, const void *s2, size_t n)
 {
     int result = 0;
     const uint8_t *p1 = (const uint8_t *)s1;
diff --git a/secure_fw/partitions/lib/sprt/crt_memcpy.c b/secure_fw/partitions/lib/sprt/crt_memcpy.c
new file mode 100644
index 0000000..21a6301
--- /dev/null
+++ b/secure_fw/partitions/lib/sprt/crt_memcpy.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include "crt_impl_private.h"
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    union tfm_mem_addr_t p_dest, p_src;
+
+    p_dest.uint_addr = (uintptr_t)dest;
+    p_src.uint_addr = (uintptr_t)src;
+
+    /* Byte copy for unaligned address. check the last bit of address. */
+    while (n && (GET_MEM_ADDR_BIT0(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT0(p_src.uint_addr))) {
+        *p_dest.p_byte++ = *p_src.p_byte++;
+        n--;
+    }
+
+    /*
+     * Double byte copy for aligned address.
+     * Check the 2nd last bit of address.
+     */
+    while (n >= sizeof(uint16_t) && (GET_MEM_ADDR_BIT1(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT1(p_src.uint_addr))) {
+        *(p_dest.p_dbyte)++ = *(p_src.p_dbyte)++;
+        n -= sizeof(uint16_t);
+    }
+
+    /* Quad byte copy for aligned address. */
+    while (n >= sizeof(uint32_t)) {
+        *(p_dest.p_qbyte)++ = *(p_src.p_qbyte)++;
+        n -= sizeof(uint32_t);
+    }
+
+    /* Byte copy for the remaining bytes. */
+    while (n--) {
+        *p_dest.p_byte++ = *p_src.p_byte++;
+    }
+
+    return dest;
+}
diff --git a/secure_fw/partitions/lib/sprt/crt_memmove.c b/secure_fw/partitions/lib/sprt/crt_memmove.c
new file mode 100644
index 0000000..b24a39d
--- /dev/null
+++ b/secure_fw/partitions/lib/sprt/crt_memmove.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include <string.h>
+#include "crt_impl_private.h"
+
+static void *memcpy_r(void *dest, const void *src, size_t n)
+{
+    union tfm_mem_addr_t p_dest, p_src;
+
+    p_dest.uint_addr = (uintptr_t)dest + n;
+    p_src.uint_addr = (uintptr_t)src + n;
+
+    /* Byte copy for unaligned address. check the last bit of address. */
+    while (n && (GET_MEM_ADDR_BIT0(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT0(p_src.uint_addr))) {
+        *(--p_dest.p_byte) = *(--p_src.p_byte);
+        n--;
+    }
+
+    /* Double byte copy for aligned address.
+     * Check the 2nd last bit of address.
+     */
+    while (n >= sizeof(uint16_t) && (GET_MEM_ADDR_BIT1(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT1(p_src.uint_addr))) {
+        *(--p_dest.p_dbyte) = *(--p_src.p_dbyte);
+        n -= sizeof(uint16_t);
+    }
+
+    /* Quad byte copy for aligned address. */
+    while (n >= sizeof(uint32_t)) {
+        *(--p_dest.p_qbyte) = *(--p_src.p_qbyte);
+        n -= sizeof(uint32_t);
+    }
+
+    /* Byte copy for the remaining bytes. */
+    while (n--) {
+        *(--p_dest.p_byte) = *(--p_src.p_byte);
+    }
+
+    return dest;
+}
+
+/*
+ * For overlapped memory area:
+ * 1) overlapped: use reverse memory move.
+ * 2) non-overlapped: use forward memory move.
+ */
+void *memmove(void *dest, const void *src, size_t n)
+{
+    /*
+     * FixMe: Add a "assert (dest == NULL || src == NULL)" here
+     * after "assert()" for sprtl is implemented.
+     */
+    if (src >= dest) {
+        memcpy(dest, src, n);
+    } else {
+        memcpy_r(dest, src, n);
+    }
+
+    return dest;
+}
diff --git a/secure_fw/partitions/lib/sprt/crt_memset.c b/secure_fw/partitions/lib/sprt/crt_memset.c
new file mode 100644
index 0000000..92dd5a1
--- /dev/null
+++ b/secure_fw/partitions/lib/sprt/crt_memset.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2020, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include "crt_impl_private.h"
+
+void *memset(void *s, int c, size_t n)
+{
+    union tfm_mem_addr_t p_mem;
+    uint32_t quad_pattern;
+
+    p_mem.p_byte = (uint8_t *)s;
+    quad_pattern = (((uint8_t)c) << 24) | (((uint8_t)c) << 16) |
+                   (((uint8_t)c) << 8) | ((uint8_t)c);
+
+    while (n && (p_mem.uint_addr & (sizeof(uint32_t) - 1))) {
+        *p_mem.p_byte++ = (uint8_t)c;
+        n--;
+    }
+
+    while (n >= sizeof(uint32_t)) {
+        *p_mem.p_qbyte++ = quad_pattern;
+        n -= sizeof(uint32_t);
+    }
+
+    while (n--) {
+        *p_mem.p_byte++ = (uint8_t)c;
+    }
+
+    return s;
+}
diff --git a/secure_fw/partitions/lib/sprt/tfm_libsprt_c.h b/secure_fw/partitions/lib/sprt/tfm_libsprt_c.h
deleted file mode 100644
index f303dc4..0000000
--- a/secure_fw/partitions/lib/sprt/tfm_libsprt_c.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- *
- */
-
-#ifndef __TFM_LIBSPRT_C_H__
-#define __TFM_LIBSPRT_C_H__
-
-#include <stddef.h>
-
-/**
- * \brief   This function moves 'n' bytes from 'src' to 'dest'.
- *
- * \param[out]  dest        Destination address
- * \param[in]   src         Source address
- * \param[in]   n           Number of bytes to be moved
- *
- * \retval      dest        Destination address
- * \note                    Memory overlap has been taken into consideration
- *                          and processed properly in the function.
- */
-void *tfm_sprt_c_memmove(void *dest, const void *src, size_t n);
-
-/**
- * \brief   This function copies 'n' bytes from 'src' to 'dest'.
- *
- * \param[out]  dest        Destination address
- * \param[in]   src         Source address
- * \param[in]   n           Number of bytes to be copied
- *
- * \retval      dest        Destination address
- * \note                    It has the same effect as tfm_sprt_c_memmove().
- */
-void *tfm_sprt_c_memcpy(void *dest, const void *src, size_t n);
-
-/**
- * \brief   Compare the first 'n' bytes of the memory areas 's1' and 's2'.
- *
- * \param[in]   s1          The address of the first memory area
- * \param[in]   s2          The address of the second memory area
- * \param[in]   n           The size(Byte) to compare
- *
- * \retval > 0              The first n bytes of s1 great than the first n
- *                          bytes of s2
- * \retval < 0              The first n bytes of s1 less than the first n
- *                          bytes of s2
- * \retval = 0              The first n bytes of s1 equal to the first n
- *                          bytes of s2
- */
-int tfm_sprt_c_memcmp(const void *s1, const void *s2, size_t n);
-
-#endif /* __TFM_LIBSPRT_C_H__ */
diff --git a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcpy.c b/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcpy.c
deleted file mode 100644
index 919d051..0000000
--- a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memcpy.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- *
- */
-
-#include <stddef.h>
-#include "tfm_libsprt_c.h"
-
-void *tfm_sprt_c_memcpy(void *dest, const void *src, size_t n)
-{
-    return tfm_sprt_c_memmove(dest, src, n);
-}
diff --git a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memmove.c b/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memmove.c
deleted file mode 100644
index e46747e..0000000
--- a/secure_fw/partitions/lib/sprt/tfm_libsprt_c_memmove.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019, Arm Limited. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- *
- */
-
-#include <stdint.h>
-#include <stddef.h>
-#include "tfm_libsprt_c.h"
-
-#define GET_MEM_ADDR_BIT0(x)        ((x) & 0x1)
-#define GET_MEM_ADDR_BIT1(x)        ((x) & 0x2)
-
-union tfm_mem_addr_t {
-    uintptr_t uint_addr;
-    uint8_t *p_byte;
-    uint16_t *p_dbyte;
-    uint32_t *p_qbyte;
-};
-
-/*
- * Consider 3 conditions.
- * 1) quad-byte copy (qbyte)
- * 2) double-byte copy (dbyte)
- * 3) byte copy
- *
- * And for overlapped memory area.
- * 1) overlapped: use backward memory move.
- * 2) non-overlapped: use forward memory move.
- */
-
-static void *tfm_memmove_forward(void *dest, const void *src, size_t n)
-{
-    union tfm_mem_addr_t p_dest, p_src;
-
-    p_dest.uint_addr = (uintptr_t)dest;
-    p_src.uint_addr = (uintptr_t)src;
-
-    /* byte copy for unaligned address. check the last bit of address. */
-    while (n && (GET_MEM_ADDR_BIT0(p_dest.uint_addr) ||
-                 GET_MEM_ADDR_BIT0(p_src.uint_addr))) {
-        *p_dest.p_byte++ = *p_src.p_byte++;
-        n--;
-    }
-
-    /* dbyte-copy for aligned address. check the 2nd last bit of address. */
-    while (n >= sizeof(uint16_t) && (GET_MEM_ADDR_BIT1(p_dest.uint_addr) ||
-                                     GET_MEM_ADDR_BIT1(p_src.uint_addr))) {
-        *(p_dest.p_dbyte)++ = *(p_src.p_dbyte)++;
-        n -= sizeof(uint16_t);
-    }
-
-    /* qbyte-copy for aligned address. */
-    while (n >= sizeof(uint32_t)) {
-        *(p_dest.p_qbyte)++ = *(p_src.p_qbyte)++;
-        n -= sizeof(uint32_t);
-    }
-
-    /* byte copy for the remaining bytes. */
-    while (n--) {
-        *p_dest.p_byte++ = *p_src.p_byte++;
-    }
-
-    return dest;
-}
-
-static void *tfm_memmove_backward(void *dest, const void *src, size_t n)
-{
-    union tfm_mem_addr_t p_dest, p_src;
-
-    p_dest.uint_addr = (uintptr_t)dest + n;
-    p_src.uint_addr = (uintptr_t)src + n;
-
-    /* byte copy for unaligned address. check the last bit of address. */
-    while (n && (GET_MEM_ADDR_BIT0(p_dest.uint_addr) ||
-                 GET_MEM_ADDR_BIT0(p_src.uint_addr))) {
-        *(--p_dest.p_byte) = *(--p_src.p_byte);
-        n--;
-    }
-
-    /* dbyte-copy for aligned address. check the 2nd last bit of address. */
-    while (n >= sizeof(uint16_t) && (GET_MEM_ADDR_BIT1(p_dest.uint_addr) ||
-                                     GET_MEM_ADDR_BIT1(p_src.uint_addr))) {
-        *(--p_dest.p_dbyte) = *(--p_src.p_dbyte);
-        n -= sizeof(uint16_t);
-    }
-
-    /* qbyte-copy for aligned address. */
-    while (n >= sizeof(uint32_t)) {
-        *(--p_dest.p_qbyte) = *(--p_src.p_qbyte);
-        n -= sizeof(uint32_t);
-    }
-
-    /* byte copy for the remaining bytes. */
-    while (n--) {
-        *(--p_dest.p_byte) = *(--p_src.p_byte);
-    }
-
-    return dest;
-}
-
-void *tfm_sprt_c_memmove(void *dest, const void *src, size_t n)
-{
-    /*
-     * FixMe: Add a "assert (dest == NULL || src == NULL)" here
-     * after "assert()" for sprtl is implemented.
-     */
-    if (src < dest) {
-        tfm_memmove_backward(dest, src, n);
-    } else {
-        tfm_memmove_forward(dest, src, n);
-    }
-
-    return dest;
-}
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
index 99e6cbf..710038c 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
@@ -76,10 +76,10 @@
     p_stat_ctx--;
 
     /* State context is considerate at thread start.*/
-    tfm_core_util_memset(p_stat_ctx, 0, sizeof(*p_stat_ctx));
+    spm_memset(p_stat_ctx, 0, sizeof(*p_stat_ctx));
     tfm_arch_init_state_ctx(p_stat_ctx, param, pfn);
 
     /* Initialize architecture context */
-    tfm_core_util_memset(p_actx, 0, sizeof(*p_actx));
+    spm_memset(p_actx, 0, sizeof(*p_actx));
     tfm_arch_init_actx(p_actx, (uint32_t)p_stat_ctx, (uint32_t)stk_btm);
 }
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
index 2ae8337..718dd91 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
@@ -116,9 +116,8 @@
          sp <=  (S_DATA_LIMIT - sizeof(tfm_fault_context)) + 1) ||
         (sp >= NS_DATA_START &&
          sp <= (NS_DATA_LIMIT - sizeof(tfm_fault_context)) + 1)) {
-        tfm_core_util_memcpy(&tfm_fault_context,
-                             (const void *)sp,
-                             sizeof(tfm_fault_context));
+        spm_memcpy(&tfm_fault_context, (const void *)sp,
+                   sizeof(tfm_fault_context));
     }
 
     ERROR_MSG("Oops... Secure fault!!! You're not going anywhere!");
diff --git a/secure_fw/spm/cmsis_psa/spm_ipc.c b/secure_fw/spm/cmsis_psa/spm_ipc.c
index dcb2980..6537c88 100644
--- a/secure_fw/spm/cmsis_psa/spm_ipc.c
+++ b/secure_fw/spm/cmsis_psa/spm_ipc.c
@@ -562,7 +562,7 @@
     TFM_CORE_ASSERT(in_len + out_len <= PSA_MAX_IOVEC);
 
     /* Clear message buffer before using it */
-    tfm_core_util_memset(msg, 0, sizeof(struct tfm_msg_body_t));
+    spm_memset(msg, 0, sizeof(struct tfm_msg_body_t));
 
     tfm_event_init(&msg->ack_evnt);
     msg->magic = TFM_MSG_MAGIC;
@@ -892,9 +892,7 @@
         tfm_core_panic();
     }
 
-    tfm_core_util_memcpy(&ctrl_param,
-                         (const void *)args[1],
-                         sizeof(ctrl_param));
+    spm_memcpy(&ctrl_param, (const void *)args[1], sizeof(ctrl_param));
 
     type = ctrl_param.type;
     in_num = ctrl_param.in_len;
@@ -1053,7 +1051,7 @@
                            struct tfm_conn_handle_t,
                            internal_msg))->status = TFM_HANDLE_STATUS_ACTIVE;
 
-    tfm_core_util_memcpy(msg, &tmp_msg->msg, sizeof(psa_msg_t));
+    spm_memcpy(msg, &tmp_msg->msg, sizeof(psa_msg_t));
 
     /*
      * There may be multiple messages for this RoT Service signal, do not clear
@@ -1150,7 +1148,7 @@
     bytes = num_bytes > msg->msg.in_size[invec_idx] ?
                         msg->msg.in_size[invec_idx] : num_bytes;
 
-    tfm_core_util_memcpy(buffer, msg->invec[invec_idx].base, bytes);
+    spm_memcpy(buffer, msg->invec[invec_idx].base, bytes);
 
     /* There maybe some remaining data */
     msg->invec[invec_idx].base = (char *)msg->invec[invec_idx].base + bytes;
@@ -1274,8 +1272,8 @@
         tfm_core_panic();
     }
 
-    tfm_core_util_memcpy((char *)msg->outvec[outvec_idx].base +
-                         msg->outvec[outvec_idx].len, buffer, num_bytes);
+    spm_memcpy((char *)msg->outvec[outvec_idx].base +
+               msg->outvec[outvec_idx].len, buffer, num_bytes);
 
     /* Update the write number */
     msg->outvec[outvec_idx].len += num_bytes;
diff --git a/secure_fw/spm/cmsis_psa/spm_psa_client_call.c b/secure_fw/spm/cmsis_psa/spm_psa_client_call.c
index 0f9603b..a45232a 100644
--- a/secure_fw/spm/cmsis_psa/spm_psa_client_call.c
+++ b/secure_fw/spm/cmsis_psa/spm_psa_client_call.c
@@ -181,12 +181,12 @@
         tfm_core_panic();
     }
 
-    tfm_core_util_memset(invecs, 0, sizeof(invecs));
-    tfm_core_util_memset(outvecs, 0, sizeof(outvecs));
+    spm_memset(invecs, 0, sizeof(invecs));
+    spm_memset(outvecs, 0, sizeof(outvecs));
 
     /* Copy the address out to avoid TOCTOU attacks. */
-    tfm_core_util_memcpy(invecs, inptr, in_num * sizeof(psa_invec));
-    tfm_core_util_memcpy(outvecs, outptr, out_num * sizeof(psa_outvec));
+    spm_memcpy(invecs, inptr, in_num * sizeof(psa_invec));
+    spm_memcpy(outvecs, outptr, out_num * sizeof(psa_outvec));
 
     /*
      * For client input vector, it is a fatal error if the provided payload
diff --git a/secure_fw/spm/cmsis_psa/tfm_pools.c b/secure_fw/spm/cmsis_psa/tfm_pools.c
index 2fa3d40..621d4cc 100644
--- a/secure_fw/spm/cmsis_psa/tfm_pools.c
+++ b/secure_fw/spm/cmsis_psa/tfm_pools.c
@@ -36,7 +36,7 @@
     }
 
     /* Buffer should be BSS cleared but clear it again */
-    tfm_core_util_memset(pool, 0, poolsz);
+    spm_memset(pool, 0, poolsz);
 
     /* Chain pool chunks */
     tfm_list_init(&pool->chunks_list);
diff --git a/secure_fw/spm/cmsis_psa/tfm_spe_mailbox.c b/secure_fw/spm/cmsis_psa/tfm_spe_mailbox.c
index c0affe1..3dc3f26 100644
--- a/secure_fw/spm/cmsis_psa/tfm_spe_mailbox.c
+++ b/secure_fw/spm/cmsis_psa/tfm_spe_mailbox.c
@@ -133,7 +133,7 @@
         return;
     }
 
-    tfm_core_util_memset(&spe_mailbox_queue.queue[idx], 0,
+    spm_memset(&spe_mailbox_queue.queue[idx], 0,
                          sizeof(spe_mailbox_queue.queue[idx]));
     set_spe_queue_empty_status(idx);
 }
@@ -158,8 +158,8 @@
 
     /* Get reply address */
     reply_ptr = get_nspe_reply_addr(idx);
-    tfm_core_util_memcpy(&reply_ptr->return_val, &ret_result,
-                         sizeof(reply_ptr->return_val));
+    spm_memcpy(&reply_ptr->return_val, &ret_result,
+               sizeof(reply_ptr->return_val));
 
     mailbox_clean_queue_slot(idx);
 
@@ -220,8 +220,7 @@
         spe_mailbox_queue.queue[idx].ns_slot_idx = idx;
 
         msg_ptr = &spe_mailbox_queue.queue[idx].msg;
-        tfm_core_util_memcpy(msg_ptr, &ns_queue->queue[idx].msg,
-                             sizeof(*msg_ptr));
+        spm_memcpy(msg_ptr, &ns_queue->queue[idx].msg, sizeof(*msg_ptr));
 
         if (check_mailbox_msg(msg_ptr) != MAILBOX_SUCCESS) {
             mailbox_clean_queue_slot(idx);
@@ -377,7 +376,7 @@
 {
     int32_t ret;
 
-    tfm_core_util_memset(&spe_mailbox_queue, 0, sizeof(spe_mailbox_queue));
+    spm_memset(&spe_mailbox_queue, 0, sizeof(spe_mailbox_queue));
 
     spe_mailbox_queue.empty_slots =
             (mailbox_queue_status_t)((1UL << (NUM_MAILBOX_QUEUE_SLOT - 1)) - 1);
diff --git a/secure_fw/spm/cmsis_psa/tfm_thread.c b/secure_fw/spm/cmsis_psa/tfm_thread.c
index 8e95a1f..3b815cb 100644
--- a/secure_fw/spm/cmsis_psa/tfm_thread.c
+++ b/secure_fw/spm/cmsis_psa/tfm_thread.c
@@ -161,8 +161,8 @@
      * First, update latest context into the current thread context.
      * Then, update background context with next thread's context.
      */
-    tfm_core_util_memcpy(&prev->arch_ctx, p_actx, sizeof(*p_actx));
-    tfm_core_util_memcpy(p_actx, &next->arch_ctx, sizeof(next->arch_ctx));
+    spm_memcpy(&prev->arch_ctx, p_actx, sizeof(*p_actx));
+    spm_memcpy(p_actx, &next->arch_ctx, sizeof(next->arch_ctx));
 
     /* Update current thread indicator */
     CURR_THRD = next;
diff --git a/secure_fw/spm/common/init/tfm_boot_data.c b/secure_fw/spm/common/init/tfm_boot_data.c
index af52db3..660382d 100644
--- a/secure_fw/spm/common/init/tfm_boot_data.c
+++ b/secure_fw/spm/common/init/tfm_boot_data.c
@@ -220,9 +220,8 @@
      */
     for (; offset < tlv_end; offset += next_tlv_offset) {
         /* Create local copy to avoid unaligned access */
-        (void)tfm_core_util_memcpy(&tlv_entry,
-                                   (const void *)offset,
-                                   SHARED_DATA_ENTRY_HEADER_SIZE);
+        (void)spm_memcpy(&tlv_entry, (const void *)offset,
+                         SHARED_DATA_ENTRY_HEADER_SIZE);
 #ifdef LEGACY_TFM_TLV_HEADER
         next_tlv_offset = tlv_entry.tlv_len;
 #else
@@ -235,8 +234,7 @@
                 return;
             }
 
-            (void)tfm_core_util_memcpy(ptr, (const void *)offset,
-                                       next_tlv_offset);
+            (void)spm_memcpy(ptr, (const void *)offset, next_tlv_offset);
             ptr += next_tlv_offset;
             boot_data->header.tlv_tot_len += next_tlv_offset;
         }
diff --git a/secure_fw/spm/common/runtime/tfm_core_utils.c b/secure_fw/spm/common/runtime/tfm_core_utils.c
index 341e75c..070b32e 100644
--- a/secure_fw/spm/common/runtime/tfm_core_utils.c
+++ b/secure_fw/spm/common/runtime/tfm_core_utils.c
@@ -7,45 +7,48 @@
 
 #include <stdint.h>
 #include "utilities.h"
-#include "tfm_core_utils.h"
 
-union tfm_core_addr_t {
-    uintptr_t uint_addr;
-    uint8_t *p_byte;
-    uint32_t *p_word;
+#define GET_MEM_ADDR_BIT0(x)        ((x) & 0x1)
+#define GET_MEM_ADDR_BIT1(x)        ((x) & 0x2)
+
+union tfm_mem_addr_t {
+    uintptr_t uint_addr;        /* Address          */
+    uint8_t *p_byte;            /* Byte copy        */
+    uint16_t *p_dbyte;          /* Double byte copy */
+    uint32_t *p_qbyte;          /* Quad byte copy   */
 };
 
-void *tfm_core_util_memcpy(void *dest, const void *src, size_t n)
+void *spm_memcpy(void *dest, const void *src, size_t n)
 {
-    union tfm_core_addr_t p_dest;
-    union tfm_core_addr_t p_src;
+    union tfm_mem_addr_t p_dest, p_src;
 
-    TFM_CORE_ASSERT(dest != src);
+    p_dest.uint_addr = (uintptr_t)dest;
+    p_src.uint_addr = (uintptr_t)src;
 
-    p_dest.p_byte = (uint8_t *)dest;
-    p_src.p_byte = (uint8_t *)src;
-
-    /*
-     * Check src and dest address value to see if word-copy is applicable.
-     * If applicable, use byte-copy for the first several unaligned bytes,
-     * and then, word-copy for aligned memory.
-     */
-    if (!((p_dest.uint_addr ^ p_src.uint_addr) & (sizeof(uint32_t) - 1))) {
-        while (n && (p_dest.uint_addr & (sizeof(uint32_t) - 1))) {
-            *p_dest.p_byte++ = *p_src.p_byte++;
-            n--;
-        }
-
-        while (n >= sizeof(uint32_t)) {
-            *p_dest.p_word++ = *p_src.p_word++;
-            n -= sizeof(uint32_t);
-        }
+    /* Byte copy for unaligned address. check the last bit of address. */
+    while (n && (GET_MEM_ADDR_BIT0(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT0(p_src.uint_addr))) {
+        *p_dest.p_byte++ = *p_src.p_byte++;
+        n--;
     }
 
     /*
-     * Word-copy is not applicable, use byte-copy for the remaining
-     * unaligned memory.
+     * Double byte copy for aligned address.
+     * Check the 2nd last bit of address.
      */
+    while (n >= sizeof(uint16_t) && (GET_MEM_ADDR_BIT1(p_dest.uint_addr) ||
+           GET_MEM_ADDR_BIT1(p_src.uint_addr))) {
+        *(p_dest.p_dbyte)++ = *(p_src.p_dbyte)++;
+        n -= sizeof(uint16_t);
+    }
+
+    /* Quad byte copy for aligned address. */
+    while (n >= sizeof(uint32_t)) {
+        *(p_dest.p_qbyte)++ = *(p_src.p_qbyte)++;
+        n -= sizeof(uint32_t);
+    }
+
+    /* Byte copy for the remaining bytes. */
     while (n--) {
         *p_dest.p_byte++ = *p_src.p_byte++;
     }
@@ -53,9 +56,9 @@
     return dest;
 }
 
-void *tfm_core_util_memset(void *s, int c, size_t n)
+void *spm_memset(void *s, int c, size_t n)
 {
-    union tfm_core_addr_t p_mem;
+    union tfm_mem_addr_t p_mem;
     uint32_t quad_pattern;
 
     p_mem.p_byte = (uint8_t *)s;
@@ -68,7 +71,7 @@
     }
 
     while (n >= sizeof(uint32_t)) {
-        *p_mem.p_word++ = quad_pattern;
+        *p_mem.p_qbyte++ = quad_pattern;
         n -= sizeof(uint32_t);
     }
 
diff --git a/secure_fw/spm/include/tfm_core_utils.h b/secure_fw/spm/include/tfm_core_utils.h
index 680deec..e916842 100644
--- a/secure_fw/spm/include/tfm_core_utils.h
+++ b/secure_fw/spm/include/tfm_core_utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Arm Limited. All rights reserved.
+ * Copyright (c) 2019-2020, Arm Limited. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  *
@@ -22,7 +22,7 @@
  * \note                    The function is used for copying same-sized object
  *                          only.
  */
-void *tfm_core_util_memcpy(void *dest, const void *src, size_t n);
+void *spm_memcpy(void *dest, const void *src, size_t n);
 
 /**
  * \brief   Memory set function for TF-M core
@@ -33,6 +33,6 @@
  *
  * \retval                  Destination address of memory
  */
-void *tfm_core_util_memset(void *s, int c, size_t n);
+void *spm_memset(void *s, int c, size_t n);
 
 #endif /* __TFM_CORE_UTILS_H__ */