SPM: PSA API working under Thread mode when level 1

This patch redirects PSA API into a customized 'thread call' ABI to
make it execute in thread mode, when:

- Runtime backend IPC and Isolation Level 1.

This 'thread call' ABI happens in 'Thread mode', and:

- Switch to SPM stack after PSA APIs are called. The working stack
  re-uses the NS Agent stack to save runtime memory or allocated a
  dedicated stack for it if no Trustzone NS Agent is available. No
  need to make SPM working under 'Handler mode' in this case.

- Lock scheduler to ensure PSA API context is not nested because of
  scheduling. The locking is performed by setting a flag, and the
  scheduler entry checks this flag to decide schedule or not.

- When PSA API returns, switch back to the caller stack. And trigger
  scheduler in the caller stack if a scheduling is expected. This
  helps the scheduler context management.

- SPM functionalities other than PSA API are still using SVC-based
  implementation, such as SPM initialization and log info flushing.

The content of this patch:

- Define interfaces, ABI dispatcher and architecture modifications
  to support handling PSA API under thread mode.

- Related header file and source path adjustment to help the build pass.

- Adjust IDLE partition stack size to make runtime execution work, the
  original size is too compact to run under debug build of specific
  platforms.

Change-Id: I52add3f57ef0f958d25725efb4eb702617586907
Signed-off-by: Ken Liu <Ken.Liu@arm.com>
Co-authored-by: Mingyang Sun <Mingyang.Sun@arm.com>
diff --git a/interface/include/config_impl.h.template b/interface/include/config_impl.h.template
index 4210055..6ef1a2b 100644
--- a/interface/include/config_impl.h.template
+++ b/interface/include/config_impl.h.template
@@ -12,10 +12,16 @@
 
 {% if ipc_partition_num > 0 and sfn_partition_num == 0 %}
 #define {{"%-56s"|format("CONFIG_TFM_SPM_BACKEND_IPC")}} 1
+
+#if TFM_LVL > 1
 #define {{"%-56s"|format("CONFIG_TFM_PSA_API_SUPERVISOR_CALL")}} 1
+#else
+#define {{"%-56s"|format("CONFIG_TFM_PSA_API_THREAD_CALL")}} 1
+#define {{"%-56s"|format("CONFIG_TFM_SPM_THREAD_STACK_SIZE")}} 1024
+#endif
 
 {% elif sfn_partition_num > 0 and ipc_partition_num == 0 %}
-#define CONFIG_TFM_SPM_BACKEND_SFN                 1
+#define {{"%-56s"|format("CONFIG_TFM_SPM_BACKEND_SFN")}} 1
 
 #if TFM_LVL > 1
 #error "High isolation level SFN model is not supported."
diff --git a/interface/include/psa_interface_redirect.h b/interface/include/psa_interface_redirect.h
index 2f2bc8c..ac16008 100644
--- a/interface/include/psa_interface_redirect.h
+++ b/interface/include/psa_interface_redirect.h
@@ -7,7 +7,7 @@
 #ifndef __PSA_INTERFACE_REDIRECT_H__
 #define __PSA_INTERFACE_REDIRECT_H__
 
-#ifdef CONFIG_TFM_PSA_API_SUPERVISOR_CALL
+#if defined(CONFIG_TFM_PSA_API_SUPERVISOR_CALL)
 
 #define psa_framework_version    psa_framework_version_svc
 #define psa_version              psa_version_svc
@@ -30,6 +30,29 @@
 #define psa_reset_signal         psa_reset_signal_svc
 #define psa_rot_lifecycle_state  psa_rot_lifecycle_state_svc
 
+#elif defined(CONFIG_TFM_PSA_API_THREAD_CALL)
+
+#define psa_framework_version    psa_framework_version_thread
+#define psa_version              psa_version_thread
+#define psa_connect              psa_connect_thread
+#define tfm_psa_call_pack        tfm_psa_call_pack_thread
+#define psa_close                psa_close_thread
+#define psa_wait                 psa_wait_thread
+#define psa_get                  psa_get_thread
+#define psa_set_rhandle          psa_set_rhandle_thread
+#define psa_read                 psa_read_thread
+#define psa_skip                 psa_skip_thread
+#define psa_write                psa_write_thread
+#define psa_reply                psa_reply_thread
+#define psa_notify               psa_notify_thread
+#define psa_clear                psa_clear_thread
+#define psa_eoi                  psa_eoi_thread
+#define psa_panic                psa_panic_thread
+#define psa_irq_enable           psa_irq_enable_thread
+#define psa_irq_disable          psa_irq_disable_thread
+#define psa_reset_signal         psa_reset_signal_thread
+#define psa_rot_lifecycle_state  psa_rot_lifecycle_state_thread
+
 #endif
 
 #endif /* __PSA_INTERFACE_REDIRECT_H__ */
diff --git a/secure_fw/CMakeLists.txt b/secure_fw/CMakeLists.txt
index e43e31f..982bea8 100644
--- a/secure_fw/CMakeLists.txt
+++ b/secure_fw/CMakeLists.txt
@@ -93,6 +93,7 @@
 
 set_source_files_properties(
     ${CMAKE_SOURCE_DIR}/secure_fw/spm/cmsis_psa/psa_interface_svc.c
+    ${CMAKE_SOURCE_DIR}/secure_fw/spm/cmsis_psa/psa_interface_thread.c
     PROPERTIES
     COMPILE_FLAGS $<$<C_COMPILER_ID:GNU>:-Wno-unused-parameter>
     COMPILE_FLAGS $<$<C_COMPILER_ID:ARMClang>:-Wno-unused-parameter>
@@ -103,6 +104,7 @@
 target_sources(tfm_secure_api
     INTERFACE
         $<$<BOOL:${TFM_PSA_API}>:${CMAKE_SOURCE_DIR}/secure_fw/spm/cmsis_psa/psa_interface_svc.c>
+        $<$<BOOL:${TFM_PSA_API}>:${CMAKE_SOURCE_DIR}/secure_fw/spm/cmsis_psa/psa_interface_thread.c>
 )
 
 target_compile_definitions(tfm_secure_api
diff --git a/secure_fw/partitions/idle_partition/load_info_idle_sp.c b/secure_fw/partitions/idle_partition/load_info_idle_sp.c
index eba22b6..4457753 100644
--- a/secure_fw/partitions/idle_partition/load_info_idle_sp.c
+++ b/secure_fw/partitions/idle_partition/load_info_idle_sp.c
@@ -12,7 +12,7 @@
 #include "load/service_defs.h"
 #include "load/asset_defs.h"
 
-#define IDLE_SP_STACK_SIZE      (0x60)
+#define IDLE_SP_STACK_SIZE      (0x100)
 
 struct partition_tfm_sp_idle_load_info_t {
     /* common length load data */
diff --git a/secure_fw/spm/CMakeLists.txt b/secure_fw/spm/CMakeLists.txt
index 8831f40..8e30035 100755
--- a/secure_fw/spm/CMakeLists.txt
+++ b/secure_fw/spm/CMakeLists.txt
@@ -50,6 +50,7 @@
         $<$<BOOL:${TFM_PSA_API}>:cmsis_psa/arch/tfm_arch.c>
         $<$<BOOL:${TFM_PSA_API}>:cmsis_psa/main.c>
         $<$<BOOL:${TFM_PSA_API}>:cmsis_psa/spm_ipc.c>
+        $<$<BOOL:${TFM_PSA_API}>:cmsis_psa/spm_thread_call.c>
         $<$<BOOL:${TFM_PSA_API}>:cmsis_psa/static_load.c>
         $<$<BOOL:${TFM_PSA_API}>:ffm/psa_api.c>
         $<$<BOOL:${TFM_PSA_API}>:ffm/backend_ipc.c>
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
index 823e293..ed0c74c 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch.c
@@ -31,6 +31,21 @@
                                                        ->stat_ctx.r0 = ret_code;
 }
 
+/* Caution: Keep 'uint32_t' always for collecting thread return values! */
+__attribute__((naked)) uint32_t tfm_arch_trigger_pendsv(void)
+{
+    __ASM volatile(
+#ifndef __ICCARM__
+        ".syntax unified                                 \n"
+#endif
+        "ldr     r0, =%a0                                \n"
+        "ldr     r1, ="M2S(SCB_ICSR_PENDSVSET_Msk)"      \n"
+        "str     r1, [r0, #0]                            \n"
+        "bx      lr                                      \n"
+        :: "i" (&(SCB->ICSR))
+    );
+}
+
 /*
  * Initializes the State Context. The Context is used to do Except Return to
  * Thread Mode to start a function.
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v6m_v7m.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v6m_v7m.c
index b876049..ba7d65d 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v6m_v7m.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v6m_v7m.c
@@ -6,6 +6,7 @@
  */
 
 #include <inttypes.h>
+#include "compiler_ext_defs.h"
 #include "tfm_core_utils.h"
 #include "tfm_hal_device_header.h"
 #include "tfm_arch.h"
@@ -20,6 +21,50 @@
 
 extern uint32_t SVCHandler_main(uint32_t *svc_args, uint32_t lr);
 
+/* Delcaraction flag to control the scheduling logic in PendSV. */
+static uint32_t pendsv_idling = 0;
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+__naked uint32_t arch_non_preempt_call(uintptr_t fn_addr, uintptr_t frame_addr,
+                                       uint32_t stk_base, uint32_t stk_limit)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                \n"
+#endif
+        "   push   {r4, lr}                             \n"
+        "   cpsid  i                                    \n"
+        "   cmp    r2, #0                               \n"
+        "   beq    v6v7_lock_sched                      \n"
+        "   mov    r4, sp                               \n"/* switch stack   */
+        "   mov    sp, r2                               \n"
+        "   mov    r2, r4                               \n"
+        "v6v7_lock_sched:                               \n"/* lock pendsv    */
+        "   ldr    r3, =%a1                             \n"/* R2 = caller SP */
+        "   movs   r4, #0x1                             \n"/* Do not touch   */
+        "   str    r4, [r3, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   push   {r2, r3}                             \n"
+        "   bl     %a0                                  \n"
+        "   pop    {r2, r3}                             \n"
+        "   cpsid  i                                    \n"
+        "   cmp    r2, #0                               \n"
+        "   beq    v6v7_release_sched                   \n"
+        "   mov    sp, r2                               \n"/* switch stack   */
+        "v6v7_release_sched:                            \n"
+        "   ldr    r2, =%a1                             \n"/* release pendsv */
+        "   movs   r3, #0                               \n"
+        "   str    r3, [r2, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   pop    {r4, pc}                             \n"
+        : : "i" (spcall_execute_c),
+            "i" (&pendsv_idling)
+    );
+}
+
+#endif /* CONFIG_TFM_PSA_API_THREAD_CALL */
+
 #if defined(__ICCARM__)
 #pragma required = do_schedule
 #endif
@@ -30,8 +75,11 @@
 #if !defined(__ICCARM__)
         ".syntax unified                    \n"
 #endif
+        "   ldr     r0, =%a1                \n"
+        "   cmp     r0, #0                  \n"
+        "   bne     v6v7_pendsv_exit        \n"
         "   push    {r0, lr}                \n"
-        "   bl      do_schedule             \n"
+        "   bl      %a0                     \n"
         "   pop     {r2, r3}                \n"
         "   mov     lr, r3                  \n"
         "   cmp     r0, r1                  \n" /* ctx of curr and next thrd */
@@ -61,6 +109,8 @@
         "   msr     psp, r2                 \n"
         "v6v7_pendsv_exit:                  \n"
         "   bx      lr                      \n"
+        :: "i" (do_schedule),
+           "i" (&pendsv_idling)
     );
 }
 
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_base.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_base.c
index c073eb6..32fe226 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_base.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_base.c
@@ -6,6 +6,7 @@
  */
 
 #include <inttypes.h>
+#include "compiler_ext_defs.h"
 #include "spm_ipc.h"
 #include "tfm_hal_device_header.h"
 #include "tfm_arch.h"
@@ -20,6 +21,56 @@
 #error "Unsupported ARM Architecture."
 #endif
 
+/* Delcaraction flag to control the scheduling logic in PendSV. */
+static uint32_t pendsv_idling = EXC_RETURN_SECURE_STACK;
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+__naked uint32_t arch_non_preempt_call(uintptr_t fn_addr, uintptr_t frame_addr,
+                                       uint32_t stk_base, uint32_t stk_limit)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                \n"
+#endif
+        "   push   {r4-r6, lr}                          \n"
+        "   cpsid  i                                    \n"
+        "   mov    r4, r2                               \n"
+        "   cmp    r2, #0                               \n"
+        "   beq    v8b_lock_sched                       \n"
+        "   mrs    r5, psplim                           \n"/*To caller stack*/
+        "   movs   r4, #0                               \n"
+        "   msr    psplim, r4                           \n"
+        "   mov    r4, sp                               \n"
+        "   mov    sp, r2                               \n"
+        "   msr    psplim, r3                           \n"
+        "v8b_lock_sched:                                \n"/*To lock sched  */
+        "   ldr    r2, =%a1                             \n"
+        "   movs   r3, #0x0                             \n"
+        "   str    r3, [r2, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   bl     %a0                                  \n"
+        "   cpsid  i                                    \n"
+        "   cmp    r4, #0                               \n"
+        "   beq    v8b_release_sched                    \n"
+        "   movs   r3, #0                               \n"/*To callee stack*/
+        "   msr    psplim, r3                           \n"
+        "   mov    sp, r4                               \n"
+        "   msr    psplim, r5                           \n"
+        "v8b_release_sched:                             \n"
+        "   ldr    r2, =%a1                             \n"/*To unlock sched*/
+        "   movs   r3, %2                               \n"
+        "   str    r3, [r2, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   pop    {r4-r6, pc}                          \n"
+        : : "i" (spcall_execute_c),
+            "i" (&pendsv_idling),
+            "I" (EXC_RETURN_SECURE_STACK)
+    );
+}
+
+#endif /* CONFIG_TFM_PSA_API_THREAD_CALL */
+
 #if defined(__ICCARM__)
 #pragma required = do_schedule
 #endif
@@ -30,12 +81,13 @@
 #if !defined(__ICCARM__)
         ".syntax unified                    \n"
 #endif
-        "   movs    r0, #0x40               \n"
+        "   ldr     r0, =%a1                \n"
+        "   ldr     r0, [r0]                \n"
         "   mov     r1, lr                  \n"
         "   tst     r0, r1                  \n" /* Was NS interrupted by S? */
         "   beq     v8b_pendsv_exit         \n" /* Yes, do not schedule */
         "   push    {r0, lr}                \n" /* Save dummy R0, LR */
-        "   bl      do_schedule             \n"
+        "   bl      %a0                     \n"
         "   pop     {r2, r3}                \n"
         "   mov     lr, r3                  \n"
         "   cmp     r0, r1                  \n" /* ctx of curr and next thrd */
@@ -67,6 +119,8 @@
         "   msr     psplim, r3              \n"
         "v8b_pendsv_exit:                   \n"
         "   bx      lr                      \n"
+        : : "i" (do_schedule),
+            "i" (&pendsv_idling)
     );
 }
 
diff --git a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
index 8ba02ac..57bb130 100644
--- a/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
+++ b/secure_fw/spm/cmsis_psa/arch/tfm_arch_v8m_main.c
@@ -6,6 +6,7 @@
  */
 
 #include <inttypes.h>
+#include "compiler_ext_defs.h"
 #include "tfm_hal_device_header.h"
 #include "region_defs.h"
 #include "tfm_arch.h"
@@ -21,6 +22,54 @@
 #error "Unsupported ARM Architecture."
 #endif
 
+/* Delcaraction flag to control the scheduling logic in PendSV. */
+static uint32_t pendsv_idling = EXC_RETURN_SECURE_STACK;
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+__naked uint32_t arch_non_preempt_call(uintptr_t fn_addr, uintptr_t frame_addr,
+                                       uint32_t stk_base, uint32_t stk_limit)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                \n"
+#endif
+        "   push   {r4-r6, lr}                          \n"
+        "   cpsid  i                                    \n"
+        "   mov    r4, r2                               \n"
+        "   mrs    r5, psplim                           \n"
+        "   movs   r12, #0                              \n"
+        "   cmp    r2, #0                               \n"
+        "   itttt  ne                                   \n"/*To callee stack*/
+        "   msrne  psplim, r12                          \n"
+        "   movne  r4, sp                               \n"
+        "   movne  sp, r2                               \n"
+        "   msrne  psplim, r3                           \n"
+        "   ldr    r2, =%a1                             \n"/*To lock sched  */
+        "   movs   r3, #0x0                             \n"
+        "   str    r3, [r2, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   bl     %a0                                  \n"
+        "   cpsid  i                                    \n"
+        "   movs   r12, #0                              \n"
+        "   cmp    r4, #0                               \n"
+        "   ittt   ne                                   \n"/*To caller stack*/
+        "   msrne  psplim, r12                          \n"
+        "   movne  sp, r4                               \n"
+        "   msrne  psplim, r5                           \n"
+        "   ldr    r4, =%a1                             \n"/*To unlock sched*/
+        "   movs   r5, %2                               \n"
+        "   str    r5, [r4, #0]                         \n"
+        "   cpsie  i                                    \n"
+        "   pop    {r4-r6, pc}                          \n"
+        : : "i" (spcall_execute_c),
+            "i" (&pendsv_idling),
+            "I" (EXC_RETURN_SECURE_STACK)
+    );
+}
+
+#endif /* CONFIG_TFM_PSA_API_THREAD_CALL */
+
 #if defined(__ICCARM__)
 #pragma required = do_schedule
 #endif
@@ -28,10 +77,15 @@
 __attribute__((naked)) void PendSV_Handler(void)
 {
     __ASM volatile(
-        "   tst     lr, #0x40               \n" /* Was NS interrupted by S? */
+#if !defined(__ICCARM__)
+        ".syntax unified                    \n"
+#endif
+        "   ldr     r0, =%a1                \n"
+        "   ldr     r0, [r0]                \n"
+        "   ands    r0, lr                  \n"
         "   beq     v8m_pendsv_exit         \n" /* Yes, do not schedule */
         "   push    {r0, lr}                \n" /* Save dummy R0, LR */
-        "   bl      do_schedule             \n"
+        "   bl      %a0                     \n"
         "   pop     {r2, lr}                \n"
         "   cmp     r0, r1                  \n" /* ctx of curr and next thrd */
         "   beq     v8m_pendsv_exit         \n" /* No schedule if curr = next */
@@ -45,6 +99,8 @@
         "   msr     psplim, r3              \n"
         "v8m_pendsv_exit:                   \n"
         "   bx      lr                      \n"
+        :: "i" (do_schedule),
+           "i" (&pendsv_idling)
     );
 }
 
diff --git a/secure_fw/spm/cmsis_psa/psa_interface_svc.c b/secure_fw/spm/cmsis_psa/psa_interface_svc.c
index 3ff299d..0a1f146 100644
--- a/secure_fw/spm/cmsis_psa/psa_interface_svc.c
+++ b/secure_fw/spm/cmsis_psa/psa_interface_svc.c
@@ -13,7 +13,7 @@
 #include "psa/lifecycle.h"
 #include "psa/service.h"
 
-#ifdef CONFIG_TFM_PSA_API_SUPERVISOR_CALL
+#if defined(CONFIG_TFM_PSA_API_SUPERVISOR_CALL)
 
 __naked uint32_t psa_framework_version_svc(void)
 {
diff --git a/secure_fw/spm/cmsis_psa/psa_interface_thread.c b/secure_fw/spm/cmsis_psa/psa_interface_thread.c
new file mode 100644
index 0000000..451705a
--- /dev/null
+++ b/secure_fw/spm/cmsis_psa/psa_interface_thread.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include <stdint.h>
+#include "compiler_ext_defs.h"
+#include "ffm/psa_api.h"
+#include "spm_ipc.h"
+#include "svc_num.h"
+#include "tfm_psa_call_pack.h"
+#include "psa/client.h"
+#include "psa/lifecycle.h"
+#include "psa/service.h"
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+__naked static uint32_t psa_interface_unified_abi(uint32_t r0)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "movs   r2, #1                                      \n"
+        "bl     %a0                                         \n"
+        "pop    {r0-r4, pc}                                 \n"
+        :: "i" (spm_interface_thread_dispatcher)
+    );
+}
+
+__naked uint32_t psa_framework_version_thread(void)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_client_psa_framework_version),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked uint32_t psa_version_thread(uint32_t sid)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_client_psa_version),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked psa_handle_t psa_connect_thread(uint32_t sid, uint32_t version)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_client_psa_connect),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked psa_status_t tfm_psa_call_pack_thread(psa_handle_t handle,
+                                              uint32_t ctrl_param,
+                                              const psa_invec *in_vec,
+                                              psa_outvec *out_vec)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_client_psa_call),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_close_thread(psa_handle_t handle)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_client_psa_close),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked psa_signal_t psa_wait_thread(psa_signal_t signal_mask,
+                                     uint32_t timeout)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_wait),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked psa_status_t psa_get_thread(psa_signal_t signal, psa_msg_t *msg)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_get),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_set_rhandle_thread(psa_handle_t msg_handle, void *rhandle)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "b      %a1                                         \n"
+        :: "i" (psa_set_rhandle_thread),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked size_t psa_read_thread(psa_handle_t msg_handle, uint32_t invec_idx,
+                               void *buffer, size_t num_bytes)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_read),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked size_t psa_skip_thread(psa_handle_t msg_handle,
+                               uint32_t invec_idx, size_t num_bytes)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_skip),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_write_thread(psa_handle_t msg_handle, uint32_t outvec_idx,
+                              const void *buffer, size_t num_bytes)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_write),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_reply_thread(psa_handle_t msg_handle, psa_status_t status)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_reply),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_notify_thread(int32_t partition_id)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_notify),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_clear_thread(void)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_clear),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_eoi_thread(psa_signal_t irq_signal)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_eoi),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_panic_thread(void)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_panic),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked psa_irq_status_t psa_irq_disable_thread(psa_signal_t irq_signal)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_irq_disable),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_irq_enable_thread(psa_signal_t irq_signal)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_irq_enable),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked void psa_reset_signal_thread(psa_signal_t irq_signal)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_partition_psa_reset_signal),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+__naked uint32_t psa_rot_lifecycle_state_thread(void)
+{
+    __asm volatile(
+#if !defined(__ICCARM__)
+        ".syntax unified                                    \n"
+#endif
+        "push   {r0-r4, lr}                                 \n"
+        "ldr    r0, =%a0                                    \n"
+        "mov    r1, sp                                      \n"
+        "b      %a1                                         \n"
+        :: "i" (tfm_spm_get_lifecycle_state),
+           "i" (psa_interface_unified_abi)
+    );
+}
+
+#endif /* CONFIG_TFM_PSA_API_THREAD_CALL */
diff --git a/secure_fw/spm/cmsis_psa/spm_ipc.h b/secure_fw/spm/cmsis_psa/spm_ipc.h
index cab522f..9a7a192 100644
--- a/secure_fw/spm/cmsis_psa/spm_ipc.h
+++ b/secure_fw/spm/cmsis_psa/spm_ipc.h
@@ -9,6 +9,7 @@
 #define __SPM_IPC_H__
 
 #include <stdint.h>
+#include "config_impl.h"
 #include "tfm_arch.h"
 #include "lists.h"
 #include "tfm_secure_api.h"
@@ -461,4 +462,25 @@
  */
 void spm_handle_interrupt(void *p_pt, struct irq_load_info_t *p_ildi);
 
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+/*
+ * SPM dispatcher to handle the API call under non-privileged model.
+ * This API runs under callers stack, and switch to SPM stack when
+ * calling 'p_fn', then switch back to caller stack before returning
+ * to the caller.
+ *
+ * fn_addr      - the target function to be called.
+ * frame_addr   - customized ABI frame type for the function call.
+ * switch_stack - indicator if need to switch stack.
+ */
+void spm_interface_thread_dispatcher(uintptr_t fn_addr,
+                                     uintptr_t frame_addr,
+                                     uint32_t  switch_stack);
+
+/* Execute a customized ABI function in C */
+void spcall_execute_c(uintptr_t fn_addr, uintptr_t frame_addr);
+
+#endif
+
 #endif /* __SPM_IPC_H__ */
diff --git a/secure_fw/spm/cmsis_psa/spm_thread_call.c b/secure_fw/spm/cmsis_psa/spm_thread_call.c
new file mode 100644
index 0000000..cdd7b95
--- /dev/null
+++ b/secure_fw/spm/cmsis_psa/spm_thread_call.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ */
+
+#include <stdint.h>
+#include "config_impl.h"
+#include "compiler_ext_defs.h"
+#include "spm_ipc.h"
+#include "tfm_arch.h"
+#include "utilities.h"
+#include "ffm/backend.h"
+#include "psa/client.h"
+#include "psa/lifecycle.h"
+#include "psa/service.h"
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+/* Customized ABI format */
+struct spcall_abi_frame_t {
+    uint32_t      a0;
+    uint32_t      a1;
+    uint32_t      a2;
+    uint32_t      a3;
+    uint32_t      unused0;
+    uint32_t      unused1;
+};
+
+typedef uint32_t (*target_fn_t)(uint32_t a0, uint32_t a1,
+                                uint32_t a2, uint32_t a3);
+
+void spcall_execute_c(uintptr_t fn_addr, uintptr_t frame_addr)
+{
+    struct spcall_abi_frame_t *p_frame =
+                                  (struct spcall_abi_frame_t *)frame_addr;
+
+    p_frame->a0 = ((target_fn_t)fn_addr)(p_frame->a0, p_frame->a1,
+                                         p_frame->a2, p_frame->a3);
+}
+
+void spm_interface_thread_dispatcher(uintptr_t fn_addr,
+                                     uintptr_t frame_addr,
+                                     uint32_t  switch_stack)
+{
+    arch_non_preempt_call(fn_addr, frame_addr,
+                          switch_stack ? SPM_THREAD_CONTEXT->sp : 0,
+                          switch_stack ? SPM_THREAD_CONTEXT->sp_limit : 0);
+
+    if (THRD_EXPECTING_SCHEDULE()) {
+        ((struct spcall_abi_frame_t *)frame_addr)->a0 =
+                                         tfm_arch_trigger_pendsv();
+    }
+}
+
+#endif /* CONFIG_TFM_PSA_API_THREAD_CALL */
diff --git a/secure_fw/spm/cmsis_psa/tfm_psa_api_veneers.c b/secure_fw/spm/cmsis_psa/tfm_psa_api_veneers.c
index 12b11f8..a3af13b 100644
--- a/secure_fw/spm/cmsis_psa/tfm_psa_api_veneers.c
+++ b/secure_fw/spm/cmsis_psa/tfm_psa_api_veneers.c
@@ -7,13 +7,20 @@
 
 #include <stdbool.h>
 #include <stdio.h>
+#include "config_impl.h"
 #include "security_defs.h"
+#include "tfm_psa_call_pack.h"
 #include "tfm_arch.h"
 #include "tfm_secure_api.h"
 #include "tfm_api.h"
 #include "tfm_svcalls.h"
 #include "utilities.h"
 
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+#include "spm_ipc.h"
+#include "ffm/psa_api.h"
+#endif
+
 /*
  * Use assembly to:
  * - Explicit stack usage to perform re-entrant detection.
@@ -24,63 +31,138 @@
 uint32_t tfm_psa_framework_version_veneer(void)
 {
     __ASM volatile(
+#if !defined(__ICCARM__)
+            ".syntax unified                           \n"
+#endif
+
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "   ldr    r2, [sp]                        \n"
             "   ldr    r3, ="M2S(STACK_SEAL_PATTERN)"  \n"
             "   cmp    r2, r3                          \n"
             "   bne    reent_panic1                    \n"
 #endif
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "   push   {r0-r4, lr}                     \n"
+            "   ldr    r0, =%a0                        \n"
+            "   mov    r1, sp                          \n"
+            "   movs   r2, #0                          \n"
+            "   bl     %a2                             \n"
+            "   pop    {r0-r3}                         \n"
+            "   pop    {r2, r3}                        \n"
+            "   mov    lr, r3                          \n"
+#else
             "   svc    %0                              \n"
+#endif
+
             "   bxns   lr                              \n"
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "reent_panic1:                             \n"
             "   svc    %1                              \n"
             "   b      .                               \n"
 #endif
-            : : "I" (TFM_SVC_PSA_FRAMEWORK_VERSION),
-                "I" (TFM_SVC_PSA_PANIC));
+            : :
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "i" (tfm_spm_client_psa_framework_version),
+            "I" (TFM_SVC_PSA_PANIC),
+            "i" (spm_interface_thread_dispatcher)
+#else
+            "I" (TFM_SVC_PSA_FRAMEWORK_VERSION),
+            "I" (TFM_SVC_PSA_PANIC)
+#endif
+    );
 }
 
 __tfm_psa_secure_gateway_attributes__
 uint32_t tfm_psa_version_veneer(uint32_t sid)
 {
     __ASM volatile(
+#if !defined(__ICCARM__)
+            ".syntax unified                           \n"
+#endif
+
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "   ldr    r2, [sp]                        \n"
             "   ldr    r3, ="M2S(STACK_SEAL_PATTERN)"  \n"
             "   cmp    r2, r3                          \n"
             "   bne    reent_panic2                    \n"
 #endif
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "   push   {r0-r4, lr}                     \n"
+            "   ldr    r0, =%a0                        \n"
+            "   mov    r1, sp                          \n"
+            "   movs   r2, #0                          \n"
+            "   bl     %a2                             \n"
+            "   pop    {r0-r3}                         \n"
+            "   pop    {r2, r3}                        \n"
+            "   mov    lr, r3                          \n"
+#else
             "   svc    %0                              \n"
+#endif
+
             "   bxns   lr                              \n"
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "reent_panic2:                             \n"
             "   svc    %1                              \n"
             "   b      .                               \n"
 #endif
-            : : "I" (TFM_SVC_PSA_VERSION),
-                "I" (TFM_SVC_PSA_PANIC));
+            : :
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "i" (tfm_spm_client_psa_version),
+            "I" (TFM_SVC_PSA_PANIC),
+            "i" (spm_interface_thread_dispatcher)
+#else
+            "I" (TFM_SVC_PSA_VERSION),
+            "I" (TFM_SVC_PSA_PANIC)
+#endif
+    );
 }
 
 __tfm_psa_secure_gateway_attributes__
 psa_handle_t tfm_psa_connect_veneer(uint32_t sid, uint32_t version)
 {
     __ASM volatile(
+#if !defined(__ICCARM__)
+            ".syntax unified                           \n"
+#endif
+
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "   ldr    r2, [sp]                        \n"
             "   ldr    r3, ="M2S(STACK_SEAL_PATTERN)"  \n"
             "   cmp    r2, r3                          \n"
             "   bne    reent_panic3                    \n"
 #endif
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "   push   {r0-r4, lr}                     \n"
+            "   ldr    r0, =%a0                        \n"
+            "   mov    r1, sp                          \n"
+            "   movs   r2, #0                          \n"
+            "   bl     %a2                             \n"
+            "   pop    {r0-r3}                         \n"
+            "   pop    {r2, r3}                        \n"
+            "   mov    lr, r3                          \n"
+#else
             "   svc    %0                              \n"
+#endif
+
             "   bxns   lr                              \n"
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "reent_panic3:                             \n"
             "   svc    %1                              \n"
             "   b      .                               \n"
 #endif
-            : : "I" (TFM_SVC_PSA_CONNECT),
-                "I" (TFM_SVC_PSA_PANIC));
+            : :
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "i" (tfm_spm_client_psa_connect),
+            "I" (TFM_SVC_PSA_PANIC),
+            "i" (spm_interface_thread_dispatcher)
+#else
+            "I" (TFM_SVC_PSA_CONNECT),
+            "I" (TFM_SVC_PSA_PANIC)
+#endif
+    );
 }
 
 __tfm_psa_secure_gateway_attributes__
@@ -90,6 +172,10 @@
                                  psa_outvec *out_vec)
 {
     __ASM volatile(
+#if !defined(__ICCARM__)
+            ".syntax unified                           \n"
+#endif
+
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "   push   {r2, r3}                        \n"
             "   ldr    r2, [sp, #8]                    \n"
@@ -98,34 +184,80 @@
             "   bne    reent_panic4                    \n"
             "   pop    {r2, r3}                        \n"
 #endif
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "   push   {r0-r4, lr}                     \n"
+            "   ldr    r0, =%a0                        \n"
+            "   mov    r1, sp                          \n"
+            "   movs   r2, #0                          \n"
+            "   bl     %a2                             \n"
+            "   pop    {r0-r3}                         \n"
+            "   pop    {r2, r3}                        \n"
+            "   mov    lr, r3                          \n"
+#else
             "   svc    %0                              \n"
+#endif
+
             "   bxns   lr                              \n"
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "reent_panic4:                             \n"
             "   svc    %1                              \n"
             "   b      .                               \n"
 #endif
-            : : "I" (TFM_SVC_PSA_CALL),
-                "I" (TFM_SVC_PSA_PANIC));
+            : :
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "i" (tfm_spm_client_psa_call),
+            "I" (TFM_SVC_PSA_PANIC),
+            "i" (spm_interface_thread_dispatcher)
+#else
+            "I" (TFM_SVC_PSA_CALL),
+            "I" (TFM_SVC_PSA_PANIC)
+#endif
+    );
 }
 
 __tfm_psa_secure_gateway_attributes__
 void tfm_psa_close_veneer(psa_handle_t handle)
 {
     __ASM volatile(
+#if !defined(__ICCARM__)
+            ".syntax unified                           \n"
+#endif
+
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "   ldr    r2, [sp]                        \n"
             "   ldr    r3, ="M2S(STACK_SEAL_PATTERN)"  \n"
             "   cmp    r2, r3                          \n"
             "   bne    reent_panic5                    \n"
 #endif
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "   push   {r0-r4, lr}                     \n"
+            "   ldr    r0, =%a0                        \n"
+            "   mov    r1, sp                          \n"
+            "   movs   r2, #0                          \n"
+            "   bl     %a2                             \n"
+            "   pop    {r0-r3}                         \n"
+            "   pop    {r2, r3}                        \n"
+            "   mov    lr, r3                          \n"
+#else
             "   svc    %0                              \n"
+#endif
+
             "   bxns   lr                              \n"
 #if !defined(__ARM_ARCH_8_1M_MAIN__)
             "reent_panic5:                             \n"
             "   svc    %1                              \n"
             "   b      .                               \n"
 #endif
-            : : "I" (TFM_SVC_PSA_CLOSE),
-                "I" (TFM_SVC_PSA_PANIC));
+            : :
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+            "i" (tfm_spm_client_psa_close),
+            "I" (TFM_SVC_PSA_PANIC),
+            "i" (spm_interface_thread_dispatcher)
+#else
+            "I" (TFM_SVC_PSA_CLOSE),
+            "I" (TFM_SVC_PSA_PANIC)
+#endif
+    );
 }
diff --git a/secure_fw/spm/ffm/backend_ipc.c b/secure_fw/spm/ffm/backend_ipc.c
index 3b23e1e..cd35e36 100644
--- a/secure_fw/spm/ffm/backend_ipc.c
+++ b/secure_fw/spm/ffm/backend_ipc.c
@@ -6,6 +6,7 @@
  */
 
 #include <stdint.h>
+#include "compiler_ext_defs.h"
 #include "spm_ipc.h"
 #include "tfm_hal_isolation.h"
 #include "tfm_rpc.h"
@@ -20,6 +21,24 @@
 /* Declare the global component list */
 struct partition_head_t partition_listhead;
 
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+
+#ifdef TFM_MULTI_CORE_TOPOLOGY
+/* TODO: To be checked when RPC design updates. */
+static uint8_t spm_stack_local[CONFIG_TFM_SPM_THREAD_STACK_SIZE] __aligned(8);
+struct context_ctrl_t spm_thread_context = {
+    .sp       = (uint32_t)&spm_stack_local[CONFIG_TFM_SPM_THREAD_STACK_SIZE],
+    .sp_limit = (uint32_t)spm_stack_local,
+    .reserved = 0,
+    .exc_ret  = 0,
+};
+struct context_ctrl_t *p_spm_thread_context = &spm_thread_context;
+#else
+struct context_ctrl_t *p_spm_thread_context;
+#endif
+
+#endif
+
 /*
  * Send message and wake up the SP who is waiting on message queue, block the
  * current thread and triggere scheduler.
@@ -78,6 +97,11 @@
 
     if (p_pldi->pid == TFM_SP_NON_SECURE_ID) {
         p_param = (void *)tfm_spm_hal_get_ns_entry_point();
+
+#ifdef CONFIG_TFM_PSA_API_THREAD_CALL
+        SPM_THREAD_CONTEXT = &p_pt->ctx_ctrl;
+#endif
+
     }
 
     thrd_start(&p_pt->thrd,
diff --git a/secure_fw/spm/ffm/psa_api.c b/secure_fw/spm/ffm/psa_api.c
index c11fce5..62ccb7c 100644
--- a/secure_fw/spm/ffm/psa_api.c
+++ b/secure_fw/spm/ffm/psa_api.c
@@ -15,9 +15,9 @@
 #include "load/partition_defs.h"
 #include "load/service_defs.h"
 #include "load/interrupt_defs.h"
-#include "psa_api.h"
 #include "utilities.h"
 #include "ffm/backend.h"
+#include "ffm/psa_api.h"
 #include "ffm/spm_error_base.h"
 #include "tfm_rpc.h"
 #include "tfm_spm_hal.h"
diff --git a/secure_fw/spm/include/ffm/backend.h b/secure_fw/spm/include/ffm/backend.h
index c53c35c..09fa256 100644
--- a/secure_fw/spm/include/ffm/backend.h
+++ b/secure_fw/spm/include/ffm/backend.h
@@ -9,9 +9,10 @@
 #define __BACKEND_H__
 
 #include <stdint.h>
-#include "psa/error.h"
 #include "spm_ipc.h"
+#include "tfm_arch.h"
 #include "load/spm_load_api.h"
+#include "psa/error.h"
 
 /* BASIC TYPE DEFINITIONS */
 
@@ -45,4 +46,8 @@
 extern struct partition_head_t partition_listhead;
 #define PARTITION_LIST_ADDR (&partition_listhead)
 
+/* TODO: Put this into NS Agent related service when available. */
+extern struct context_ctrl_t *p_spm_thread_context;
+#define SPM_THREAD_CONTEXT p_spm_thread_context
+
 #endif /* __BACKEND_H__ */
diff --git a/secure_fw/spm/ffm/psa_api.h b/secure_fw/spm/include/ffm/psa_api.h
similarity index 100%
rename from secure_fw/spm/ffm/psa_api.h
rename to secure_fw/spm/include/ffm/psa_api.h
diff --git a/secure_fw/spm/include/tfm_arch.h b/secure_fw/spm/include/tfm_arch.h
index f7740c2..40d1199 100644
--- a/secure_fw/spm/include/tfm_arch.h
+++ b/secure_fw/spm/include/tfm_arch.h
@@ -69,12 +69,6 @@
     struct tfm_state_context_t state_ctx; /* ctx on SVC_PREPARE_DEPRIV_FLIH */
 };
 
-__attribute__ ((always_inline))
-__STATIC_INLINE void tfm_arch_trigger_pendsv(void)
-{
-    SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
-}
-
 /**
  * \brief Get Link Register
  * \details Returns the value of the Link Register (LR)
@@ -147,4 +141,18 @@
  */
 uint32_t tfm_arch_refresh_hardware_context(void *p_ctx_ctrl);
 
+/*
+ * Triggers scheduler. A return type is assigned in case
+ * SPM returns values by the context.
+ */
+uint32_t tfm_arch_trigger_pendsv(void);
+
+
+/*
+ * Switch to a new stack area, lock scheduler and call function.
+ * If 'stk_base' is ZERO, stack won't be switched and re-use caller stack.
+ */
+uint32_t arch_non_preempt_call(uintptr_t fn_addr, uintptr_t frame_addr,
+                               uint32_t stk_base, uint32_t stk_limit);
+
 #endif