core: Handle S-EL1 SP on separate thread

S-EL1 SPs need to be run on a separate thread context.
This is needed so the context can be saved and restored when a FF-A
message is received or send.

Signed-off-by: Jelle Sels <jelle.sels@arm.com>
Change-Id: I2730ab6b7abfe723e284747717cacb6227a408cd
diff --git a/core/arch/arm/include/kernel/pseudo_sp.h b/core/arch/arm/include/kernel/pseudo_sp.h
index 85635da..a0c648f 100644
--- a/core/arch/arm/include/kernel/pseudo_sp.h
+++ b/core/arch/arm/include/kernel/pseudo_sp.h
@@ -1,30 +1,29 @@
 /* SPDX-License-Identifier: BSD-2-Clause */
 /*
- * Copyright (c) 2020, Arm Limited.
+ * Copyright (c) 2020-2021, Arm Limited.
  */
 #ifndef KERNEL_PSEUDO_SP_H
 #define KERNEL_PSEUDO_SP_H
 
 #include <kernel/sp.h>
 #include <kernel/tee_ta_manager.h>
+#include <kernel/thread.h>
 #include <scattered_array.h>
 
 struct pseudo_sp_head {
 	TEE_UUID uuid;
 	const char *name;
-	uint32_t (*invoke_command_entry_point)(uint64_t session_id,
-					       uint64_t src_id,
-					       struct thread_smc_args *args);
+	uint32_t (*invoke_command_entry_point)(uint32_t session_id);
 };
-
 #define pseudo_sp_register(...)	\
 	SCATTERED_ARRAY_DEFINE_PG_ITEM(pseudo_sps, struct pseudo_sp_head) = \
-		{ __VA_ARGS__ }
+		{ __VA_ARGS__  }
 
 struct pseudo_sp_ctx {
 	const struct pseudo_sp_head *pseudo_sp;
 	struct tee_ta_ctx ctx;
 	struct sp_session  *sp_s;
+	struct thread_ctx_regs thread;
 };
 
 bool is_pseudo_sp_ctx(struct tee_ta_ctx *ctx);
@@ -32,4 +31,17 @@
 TEE_Result pseudo_sp_init_user_session(const TEE_UUID *uuid,
 				       struct tee_ta_session *s);
 
+static inline struct pseudo_sp_ctx *to_pseudo_sp_ctx(struct tee_ta_ctx *ctx)
+{
+	assert(is_pseudo_sp_ctx(ctx));
+	return container_of(ctx, struct pseudo_sp_ctx, ctx);
+}
+
+void pseudo_sp_ffa(struct sp_session  *sp_s, struct thread_smc_args *args);
+void pseudo_sp_error(struct thread_smc_args *args, struct sp_session  *s,
+		     uint32_t error);
+void sel1_ffa(struct thread_ctx_regs *dst_thread,
+	      struct thread_ctx_regs *src_thread);
+
+void *pseudo_sp_retrieve_mem(uint64_t global_handle);
 #endif /* KERNEL_PSEUDO_SP_H */
diff --git a/core/arch/arm/kernel/asm-defines.c b/core/arch/arm/kernel/asm-defines.c
index 505410e..11d88ed 100644
--- a/core/arch/arm/kernel/asm-defines.c
+++ b/core/arch/arm/kernel/asm-defines.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: BSD-2-Clause
 /*
  * Copyright (c) 2016, Linaro Limited
+ * Copyright (c) 2021, Arm Limited.
  */
 
 #include <gen-asm-defines.h>
@@ -69,6 +70,8 @@
 
 	/* struct thread_ctx_regs */
 	DEFINE(THREAD_CTX_REGS_SP, offsetof(struct thread_ctx_regs, sp));
+	DEFINE(THREAD_CTX_REGS_PC, offsetof(struct thread_ctx_regs, pc));
+	DEFINE(THREAD_CTX_REGS_CPSR, offsetof(struct thread_ctx_regs, cpsr));
 	DEFINE(THREAD_CTX_REGS_X0, offsetof(struct thread_ctx_regs, x[0]));
 	DEFINE(THREAD_CTX_REGS_X1, offsetof(struct thread_ctx_regs, x[1]));
 	DEFINE(THREAD_CTX_REGS_X2, offsetof(struct thread_ctx_regs, x[2]));
diff --git a/core/arch/arm/kernel/pseudo_sp.c b/core/arch/arm/kernel/pseudo_sp.c
index 2ef0f31..bb211cc 100644
--- a/core/arch/arm/kernel/pseudo_sp.c
+++ b/core/arch/arm/kernel/pseudo_sp.c
@@ -1,20 +1,26 @@
 // SPDX-License-Identifier: BSD-2-Clause
 /*
- * Copyright (c) 2020, Arm Limited
+ * Copyright (c) 2020-2021, Arm Limited
  */
 #include <initcall.h>
 #include <kernel/linker.h>
+#include <kernel/misc.h>
 #include <kernel/panic.h>
 #include <kernel/pseudo_sp.h>
 #include <kernel/tee_ta_manager.h>
+#include <ffa.h>
+#include <ffa_sp.h>
 #include <mm/core_memprot.h>
 #include <mm/mobj.h>
 #include <stdlib.h>
 #include <string.h>
 #include <trace.h>
 #include <types_ext.h>
+#include "thread_private.h"
 
 const struct tee_ta_ops *_pseudo_sp_ops;
+struct thread_ctx_regs *calling_thread[CFG_TEE_CORE_NB_CORE];
+
 TEE_Result tee_init_pseudo_sp_sessions(void)
 {
 	const struct pseudo_sp_head *sp;
@@ -53,6 +59,127 @@
 	return TEE_SUCCESS;
 }
 
+void pseudo_sp_error(struct thread_smc_args *args, struct sp_session  *s,
+		     uint32_t error)
+{
+	args->a0 = FFA_ERROR;
+	args->a2 = error;
+
+	pseudo_sp_ffa(s, args);
+}
+
+static uint32_t pseudo_sp_invoke(uint64_t session_id, uint64_t src_id,
+				 struct thread_smc_args *args)
+{
+	struct sp_session  *sp_s = NULL;
+	struct pseudo_sp_ctx *stc = NULL;
+	struct thread_ctx_regs current_thread;
+	size_t core_num = 0;
+	struct sp_session *caller_sp = NULL;
+
+	core_num = get_core_pos();
+
+	assert(core_num < CFG_TEE_CORE_NB_CORE);
+
+	sp_s = sp_get_session(session_id);
+	stc = to_pseudo_sp_ctx(sp_s->s->ctx);
+
+	calling_thread[core_num] = &current_thread;
+
+	stc->thread.x[0] = args->a0;
+	stc->thread.x[1] = args->a1;
+	stc->thread.x[2] = args->a2;
+	stc->thread.x[3] = args->a3;
+	stc->thread.x[4] = args->a4;
+	stc->thread.x[5] = args->a5;
+	stc->thread.x[6] = args->a6;
+	stc->thread.x[7] = args->a7;
+
+	tee_ta_pop_current_session();
+	sel1_ffa(&stc->thread, &current_thread);
+
+	args->a0 = stc->thread.x[0];
+	args->a1 = stc->thread.x[1];
+	args->a2 = stc->thread.x[2];
+	args->a3 = stc->thread.x[3];
+	args->a4 = stc->thread.x[4];
+	args->a5 = stc->thread.x[5];
+	args->a6 = stc->thread.x[6];
+	args->a7 = stc->thread.x[7];
+
+	if (args->a0 == FFA_ERROR) {
+		ffa_send_error(args, args->a2,
+			       src_id);
+	}
+
+	caller_sp = sp_get_session(((args->a1 >> 16) & 0xffff));
+	assert(caller_sp);
+	tee_ta_push_current_session(caller_sp->s);
+	return 0;
+}
+
+void *pseudo_sp_retrieve_mem(uint64_t global_handle)
+{
+	struct mobj *m = NULL;
+	paddr_t pa;
+
+	m = mobj_ffa_get_by_cookie(global_handle, 0);
+
+	if (m->ops->get_pa(m, 0, SMALL_PAGE_SIZE, &pa))
+		return NULL;
+
+	return phys_to_virt(pa, MEM_AREA_TA_VASPACE);
+}
+
+void pseudo_sp_ffa(struct sp_session  *sp_s, struct thread_smc_args *args)
+{
+	struct pseudo_sp_ctx *stc = NULL;
+	size_t core_num = get_core_pos();
+	struct sp_session *caller_sp = NULL;
+
+	assert(core_num < CFG_TEE_CORE_NB_CORE);
+
+	stc = to_pseudo_sp_ctx(sp_s->s->ctx);
+
+	stc->thread.x[0] = args->a0;
+	stc->thread.x[1] = args->a1;
+	stc->thread.x[2] = args->a2;
+	stc->thread.x[3] = args->a3;
+	stc->thread.x[4] = args->a4;
+	stc->thread.x[5] = args->a5;
+	stc->thread.x[6] = args->a6;
+	stc->thread.x[7] = args->a7;
+
+	tee_ta_pop_current_session();
+	sel1_ffa(calling_thread[core_num], &stc->thread);
+
+	args->a0 = stc->thread.x[0];
+	args->a1 = stc->thread.x[1];
+	args->a2 = stc->thread.x[2];
+	args->a3 = stc->thread.x[3];
+	args->a4 = stc->thread.x[4];
+	args->a5 = stc->thread.x[5];
+	args->a6 = stc->thread.x[6];
+	args->a7 = stc->thread.x[7];
+
+	caller_sp = sp_get_session(((args->a1 >> 16) & 0xffff));
+	assert(caller_sp);
+	tee_ta_push_current_session(caller_sp->s);
+}
+
+static void start(struct pseudo_sp_ctx *stc, void *fn, uint32_t session_id)
+{
+	size_t core_num = get_core_pos();
+	struct thread_ctx_regs current_thread;
+
+	assert(core_num < CFG_TEE_CORE_NB_CORE);
+
+	calling_thread[core_num] = &current_thread;
+	thread_sel1_sp_alloc(&stc->thread, (uint64_t)fn);
+	stc->thread.x[0] = session_id;
+	sel1_ffa(&stc->thread, &current_thread);
+}
+
 TEE_Result pseudo_sp_init_user_session(const TEE_UUID *uuid,
 				       struct tee_ta_session *s)
 {
@@ -84,9 +211,9 @@
 	s->ctx->ops = _pseudo_sp_ops;
 
 	sp_s->s = s;
-	sp_s->enter_sp = sp->invoke_command_entry_point;
+	sp_s->enter_sp = pseudo_sp_invoke;
 	sp_s->response_id = UNDEFINED_SP;
-	sp_s->state = sp_busy;
+	sp_s->state = sp_idle;
 
 	s->ctx->ref_count = 1;
 	stc->pseudo_sp = sp;
@@ -96,6 +223,8 @@
 	sp_sessions_list_head(&open_sessions);
 	TAILQ_INSERT_TAIL(open_sessions, sp_s, link);
 	DMSG("Pseudo SP %s (%pUl) loaded", sp->name, uuid);
+	start(stc, sp->invoke_command_entry_point, sp_s->s->id);
+
 	return TEE_SUCCESS;
 }
 
diff --git a/core/arch/arm/kernel/thread.c b/core/arch/arm/kernel/thread.c
index c161e84..64bb0b7 100644
--- a/core/arch/arm/kernel/thread.c
+++ b/core/arch/arm/kernel/thread.c
@@ -2,7 +2,7 @@
 /*
  * Copyright (c) 2016, Linaro Limited
  * Copyright (c) 2014, STMicroelectronics International N.V.
- * Copyright (c) 2020, Arm Limited
+ * Copyright (c) 2020-2021, Arm Limited
  */
 
 #include <platform_config.h>
@@ -18,6 +18,7 @@
 #include <kernel/lockdep.h>
 #include <kernel/misc.h>
 #include <kernel/panic.h>
+#include <kernel/pseudo_sp.h>
 #include <kernel/spinlock.h>
 #include <kernel/tee_ta_manager.h>
 #include <kernel/thread_defs.h>
@@ -128,6 +129,9 @@
 DECLARE_STACK(stack_abt, CFG_TEE_CORE_NB_CORE, STACK_ABT_SIZE, static);
 #ifndef CFG_WITH_PAGER
 DECLARE_STACK(stack_thread, CFG_NUM_THREADS, STACK_THREAD_SIZE, static);
+#ifdef CFG_NUM_PSP
+DECLARE_STACK(stack_psp, CFG_NUM_PSP, STACK_THREAD_SIZE, static);
+#endif
 #endif
 
 #define GET_STACK_TOP_HARD(stack, n) \
@@ -430,71 +434,72 @@
 }
 
 #ifdef ARM32
-static void init_regs(struct thread_ctx *thread, uint32_t a0, uint32_t a1,
+static void init_regs(struct thread_ctx_regs *thread, uint32_t a0, uint32_t a1,
 		      uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5,
-		      uint32_t a6, uint32_t a7)
+		      uint32_t a6, uint32_t a7, vaddr_t stack_va_end)
 {
-	thread->regs.pc = (uint32_t)thread_std_smc_entry;
+	thread->pc = (uint32_t)thread_std_smc_entry;
 
 	/*
 	 * Stdcalls starts in SVC mode with masked foreign interrupts, masked
 	 * Asynchronous abort and unmasked native interrupts.
 	 */
-	thread->regs.cpsr = read_cpsr() & ARM32_CPSR_E;
-	thread->regs.cpsr |= CPSR_MODE_SVC | CPSR_A |
+	thread->cpsr = read_cpsr() & ARM32_CPSR_E;
+	thread->cpsr |= CPSR_MODE_SVC | CPSR_A |
 			(THREAD_EXCP_FOREIGN_INTR << ARM32_CPSR_F_SHIFT);
 	/* Enable thumb mode if it's a thumb instruction */
-	if (thread->regs.pc & 1)
-		thread->regs.cpsr |= CPSR_T;
+	if (thread->pc & 1)
+		thread->cpsr |= CPSR_T;
 	/* Reinitialize stack pointer */
-	thread->regs.svc_sp = thread->stack_va_end;
+	thread->svc_sp = stack_va_end;
 
 	/*
 	 * Copy arguments into context. This will make the
 	 * arguments appear in r0-r7 when thread is started.
 	 */
-	thread->regs.r0 = a0;
-	thread->regs.r1 = a1;
-	thread->regs.r2 = a2;
-	thread->regs.r3 = a3;
-	thread->regs.r4 = a4;
-	thread->regs.r5 = a5;
-	thread->regs.r6 = a6;
-	thread->regs.r7 = a7;
+	thread->r0 = a0;
+	thread->r1 = a1;
+	thread->r2 = a2;
+	thread->r3 = a3;
+	thread->r4 = a4;
+	thread->r5 = a5;
+	thread->r6 = a6;
+	thread->r7 = a7;
 }
 #endif /*ARM32*/
 
 #ifdef ARM64
-static void init_regs(struct thread_ctx *thread, uint32_t a0, uint32_t a1,
+static void init_regs(struct thread_ctx_regs *thread, uint32_t a0, uint32_t a1,
 		      uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5,
-		      uint32_t a6, uint32_t a7)
+		      uint32_t a6, uint32_t a7, vaddr_t stack_va_end)
 {
-	thread->regs.pc = (uint64_t)thread_std_smc_entry;
+	thread->pc = (uint64_t)thread_std_smc_entry;
 
 	/*
 	 * Stdcalls starts in SVC mode with masked foreign interrupts, masked
 	 * Asynchronous abort and unmasked native interrupts.
 	 */
-	thread->regs.cpsr = SPSR_64(SPSR_64_MODE_EL1, SPSR_64_MODE_SP_EL0,
-				THREAD_EXCP_FOREIGN_INTR | DAIFBIT_ABT);
+	thread->cpsr = SPSR_64(SPSR_64_MODE_EL1, SPSR_64_MODE_SP_EL0,
+			       THREAD_EXCP_FOREIGN_INTR | DAIFBIT_ABT);
+
 	/* Reinitialize stack pointer */
-	thread->regs.sp = thread->stack_va_end;
+	thread->sp = stack_va_end;
 
 	/*
 	 * Copy arguments into context. This will make the
 	 * arguments appear in x0-x7 when thread is started.
 	 */
-	thread->regs.x[0] = a0;
-	thread->regs.x[1] = a1;
-	thread->regs.x[2] = a2;
-	thread->regs.x[3] = a3;
-	thread->regs.x[4] = a4;
-	thread->regs.x[5] = a5;
-	thread->regs.x[6] = a6;
-	thread->regs.x[7] = a7;
+	thread->x[0] = a0;
+	thread->x[1] = a1;
+	thread->x[2] = a2;
+	thread->x[3] = a3;
+	thread->x[4] = a4;
+	thread->x[5] = a5;
+	thread->x[6] = a6;
+	thread->x[7] = a7;
 
 	/* Set up frame pointer as per the Aarch64 AAPCS */
-	thread->regs.x[29] = 0;
+	thread->x[29] = 0;
 }
 #endif /*ARM64*/
 
@@ -547,7 +552,9 @@
 	l->curr_thread = n;
 
 	threads[n].flags = 0;
-	init_regs(threads + n, a0, a1, a2, a3, a4, a5, a6, a7);
+	init_regs(&threads[n].regs, a0, a1, a2, a3, a4, a5, a6, a7,
+		  threads[n].stack_va_end);
+
 	threads[n].regs.pc = function_id;
 
 	thread_lazy_save_ns_vfp();
@@ -571,6 +578,27 @@
 			       args->a5, args->a6, args->a7,
 			       (uint64_t)thread_sp_smc_entry);
 }
+
+void thread_sel1_sp_alloc(struct thread_ctx_regs *thread, uint64_t function_id)
+{
+#ifdef CFG_NUM_PSP
+	vaddr_t stack_va_end = 0;
+	static size_t stack_id;
+
+#ifdef CFG_WITH_PAGER
+	stack_va_end = init_thread_pager_stack() + STACK_THREAD_SIZE;
+#else
+	/* Get a new stack for every call.*/
+	stack_va_end = (vaddr_t)&stack_psp[stack_id] + STACK_THREAD_SIZE;
+	assert(stack_id < CFG_NUM_PSP);
+	stack_id++;
+#endif
+
+	init_regs(thread, 0, 0, 0, 0, 0, 0, 0, 0, stack_va_end);
+	thread->pc = function_id;
+#endif
+}
+
 #else
 void __noreturn
 thread_sp_alloc_and_run(struct thread_smc_args *args __maybe_unused)
@@ -879,7 +907,6 @@
 {
 }
 #endif
-
 int thread_state_suspend(uint32_t flags, uint32_t cpsr, vaddr_t pc)
 {
 	struct thread_core_local *l = thread_get_core_local();
@@ -993,6 +1020,36 @@
 }
 
 #ifdef CFG_WITH_PAGER
+static vaddr_t init_thread_pager_stack(void)
+{
+	tee_mm_entry_t *mm = NULL;
+	vaddr_t sp = 0;
+	size_t num_pages = 0;
+	struct fobj *fobj = NULL;
+
+	/* Find vmem for thread stack and its protection gap */
+	mm = tee_mm_alloc(&tee_mm_vcore,
+			  SMALL_PAGE_SIZE + STACK_THREAD_SIZE);
+	assert(mm);
+
+	/* Claim eventual physical page */
+	tee_pager_add_pages(tee_mm_get_smem(mm), tee_mm_get_size(mm),
+			    true);
+
+	num_pages = tee_mm_get_bytes(mm) / SMALL_PAGE_SIZE - 1;
+	fobj = fobj_locked_paged_alloc(num_pages);
+
+	/* Add the area to the pager */
+	tee_pager_add_core_area(tee_mm_get_smem(mm) + SMALL_PAGE_SIZE,
+				PAGER_AREA_TYPE_LOCK, fobj);
+	fobj_put(fobj);
+
+	/* init effective stack */
+	sp = tee_mm_get_smem(mm) + tee_mm_get_bytes(mm);
+	asan_tag_access((void *)tee_mm_get_smem(mm), (void *)sp);
+
+	return sp;
+}
 static void init_thread_stacks(void)
 {
 	size_t n = 0;
@@ -1001,31 +1058,7 @@
 	 * Allocate virtual memory for thread stacks.
 	 */
 	for (n = 0; n < CFG_NUM_THREADS; n++) {
-		tee_mm_entry_t *mm = NULL;
-		vaddr_t sp = 0;
-		size_t num_pages = 0;
-		struct fobj *fobj = NULL;
-
-		/* Find vmem for thread stack and its protection gap */
-		mm = tee_mm_alloc(&tee_mm_vcore,
-				  SMALL_PAGE_SIZE + STACK_THREAD_SIZE);
-		assert(mm);
-
-		/* Claim eventual physical page */
-		tee_pager_add_pages(tee_mm_get_smem(mm), tee_mm_get_size(mm),
-				    true);
-
-		num_pages = tee_mm_get_bytes(mm) / SMALL_PAGE_SIZE - 1;
-		fobj = fobj_locked_paged_alloc(num_pages);
-
-		/* Add the area to the pager */
-		tee_pager_add_core_area(tee_mm_get_smem(mm) + SMALL_PAGE_SIZE,
-					PAGER_AREA_TYPE_LOCK, fobj);
-		fobj_put(fobj);
-
-		/* init effective stack */
-		sp = tee_mm_get_smem(mm) + tee_mm_get_bytes(mm);
-		asan_tag_access((void *)tee_mm_get_smem(mm), (void *)sp);
+		vaddr_t sp = init_thread_pager_stack();
 		if (!thread_init_stack(n, sp))
 			panic("init stack failed");
 	}
diff --git a/core/arch/arm/kernel/thread_private.h b/core/arch/arm/kernel/thread_private.h
index 416abd7..0932120 100644
--- a/core/arch/arm/kernel/thread_private.h
+++ b/core/arch/arm/kernel/thread_private.h
@@ -191,6 +191,7 @@
 void thread_unlock_global(void);
 
 void thread_sp_alloc_and_run(struct thread_smc_args *args);
+void thread_sel1_sp_alloc(struct thread_ctx_regs *thread, uint64_t fn);
 /*
  * Suspends current thread and temorarily exits to non-secure world.
  * This function returns later when non-secure world returns.
diff --git a/core/arch/arm/kernel/thread_spmc_a64.S b/core/arch/arm/kernel/thread_spmc_a64.S
index edb290c..88a820a 100644
--- a/core/arch/arm/kernel/thread_spmc_a64.S
+++ b/core/arch/arm/kernel/thread_spmc_a64.S
@@ -115,6 +115,46 @@
 
 END_FUNC thread_sp_smc_entry
 
+/*
+ * void sel1_ffa(struct thread_ctx_regs *dst_thread,
+ *               struct thread_ctx_regs *src_thread);
+ */
+
+/*
+ * sel1_ffa will jump between the src_thread and the dst_thread. We keep
+ * running on the same thread and only switch the registers.
+ */
+FUNC sel1_ffa , :
+	/* Store the link register on the stack */
+	push x0, lr
+
+	/* Store x19-x30 in src_thread */
+	store_xregs x1, THREAD_CTX_REGS_X19, 19, 30
+
+	/* Save cpsr to the src_thread at set it to S-EL1 */
+	mrs	x2, daif
+	orr	x2, x2, #(SPSR_64_MODE_EL1 << SPSR_64_MODE_EL_SHIFT)
+	str	x2, [x1, THREAD_CTX_REGS_CPSR]
+
+	/* Store the SP in src_thread */
+	mov	x2, sp
+	str	x2, [x1, #THREAD_CTX_REGS_SP]
+
+	/*
+	 * Set the PC in src_thread to invoke_sel1_return. This address will be
+	 * used to resume.
+	 */
+	adr	x2, .sel1_ffa_return
+	str	x2, [x1, #THREAD_CTX_REGS_PC]
+
+        /* dst_thread is passed to thread_resume to resume the dst thread */
+	bl thread_resume
+.sel1_ffa_return:
+
+	pop x1, lr
+
+	ret
+END_FUNC sel1_ffa
 
 /* void thread_rpc(struct thread_rpc_arg *rpc_arg) */
 FUNC thread_rpc , :
diff --git a/core/psp/sub.mk b/core/psp/sub.mk
index e69de29..bd250a7 100644
--- a/core/psp/sub.mk
+++ b/core/psp/sub.mk
@@ -0,0 +1 @@
+CFG_NUM_PSP ?= 0