ftrace: Add support for syscall function tracer

This patch adds support for syscall tracing in TEE core. It complements
existing ftrace support for user TAs via adding trace for syscalls that
are invoked by user TAs into the TEE core.

And after this patch ftrace will cover both TA and TEE core code. So lets
rename config option from CFG_TA_FTRACE_SUPPORT to CFG_FTRACE_SUPPORT.

It is optional to enable syscall trace via CFG_SYSCALL_FTRACE=y config
option in addition to CFG_FTRACE_SUPPORT=y config option.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Jerome Forissier <jerome@forissier.org>
diff --git a/core/arch/arm/arm.mk b/core/arch/arm/arm.mk
index 5defb32..ac7d9df 100644
--- a/core/arch/arm/arm.mk
+++ b/core/arch/arm/arm.mk
@@ -149,7 +149,11 @@
 ifeq ($(CFG_UNWIND),y)
 core-platform-cflags += -funwind-tables
 endif
+ifeq ($(CFG_SYSCALL_FTRACE),y)
+core-platform-cflags += $(arm32-platform-cflags-generic-arm)
+else
 core-platform-cflags += $(arm32-platform-cflags-generic-thumb)
+endif
 core-platform-aflags += $(core_arm32-platform-aflags)
 core-platform-aflags += $(arm32-platform-aflags)
 endif
@@ -181,7 +185,7 @@
 # Thumb mode doesn't support function graph tracing due to missing
 # frame pointer support required to trace function call chain. So
 # rather compile in ARM mode if function tracing is enabled.
-ifeq ($(CFG_TA_FTRACE_SUPPORT),y)
+ifeq ($(CFG_FTRACE_SUPPORT),y)
 ta_arm32-platform-cflags += $(arm32-platform-cflags-generic-arm)
 else
 ta_arm32-platform-cflags += $(arm32-platform-cflags-generic-thumb)
diff --git a/core/arch/arm/include/kernel/misc.h b/core/arch/arm/include/kernel/misc.h
index 700e350..943cc57 100644
--- a/core/arch/arm/include/kernel/misc.h
+++ b/core/arch/arm/include/kernel/misc.h
@@ -13,7 +13,7 @@
 
 size_t __get_core_pos(void);
 
-static inline size_t get_core_pos(void)
+static inline size_t __noprof get_core_pos(void)
 {
 	/*
 	 * Foreign interrupts must be disabled before playing with current
diff --git a/core/arch/arm/include/kernel/user_ta.h b/core/arch/arm/include/kernel/user_ta.h
index 72d3ac8..83367e8 100644
--- a/core/arch/arm/include/kernel/user_ta.h
+++ b/core/arch/arm/include/kernel/user_ta.h
@@ -45,7 +45,7 @@
 struct user_ta_ctx {
 	uaddr_t entry_func;
 	uaddr_t dump_entry_func;
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	uaddr_t ftrace_entry_func;
 #endif
 	uaddr_t dl_entry_func;
diff --git a/core/arch/arm/include/mm/core_mmu.h b/core/arch/arm/include/mm/core_mmu.h
index 74bf87f..95ab74f 100644
--- a/core/arch/arm/include/mm/core_mmu.h
+++ b/core/arch/arm/include/mm/core_mmu.h
@@ -303,7 +303,7 @@
 #ifdef CFG_WITH_LPAE
 bool core_mmu_user_va_range_is_defined(void);
 #else
-static inline bool core_mmu_user_va_range_is_defined(void)
+static inline bool __noprof core_mmu_user_va_range_is_defined(void)
 {
 	return true;
 }
diff --git a/core/arch/arm/include/sm/sm.h b/core/arch/arm/include/sm/sm.h
index 0925e0e..a03c920 100644
--- a/core/arch/arm/include/sm/sm.h
+++ b/core/arch/arm/include/sm/sm.h
@@ -37,7 +37,7 @@
 #ifdef CFG_SM_NO_CYCLE_COUNTING
 	uint32_t pmcr;
 #endif
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	uint32_t cntkctl;
 	uint32_t pad;
 #endif
diff --git a/core/arch/arm/kernel/abort.c b/core/arch/arm/kernel/abort.c
index 33db5f4..f851c68 100644
--- a/core/arch/arm/kernel/abort.c
+++ b/core/arch/arm/kernel/abort.c
@@ -257,8 +257,12 @@
 
 	s->ctx->ops->dump_state(s->ctx);
 
-	if (s->ctx->ops->dump_ftrace)
+#if defined(CFG_FTRACE_SUPPORT)
+	if (s->ctx->ops->dump_ftrace) {
+		s->fbuf = NULL;
 		s->ctx->ops->dump_ftrace(s->ctx);
+	}
+#endif
 }
 
 static void save_abort_info_in_tsd(struct abort_info *ai)
diff --git a/core/arch/arm/kernel/sub.mk b/core/arch/arm/kernel/sub.mk
index 07be2f6..7caa056 100644
--- a/core/arch/arm/kernel/sub.mk
+++ b/core/arch/arm/kernel/sub.mk
@@ -61,3 +61,18 @@
 srcs-y += link_dummies.c
 
 asm-defines-y += asm-defines.c
+
+ifeq ($(CFG_SYSCALL_FTRACE),y)
+# We would not like to profile thread.c file as it provide common APIs
+# that are needed for ftrace framework to trace syscalls. So profiling
+# this file could create an incorrect cyclic behaviour.
+cflags-remove-thread.c-y += -pg
+cflags-remove-spin_lock_debug.c-$(CFG_TEE_CORE_DEBUG) += -pg
+# Tracing abort dump files corrupts the stack trace. So exclude them
+# from profiling.
+cflags-remove-abort.c-y += -pg
+ifeq ($(CFG_UNWIND),y)
+cflags-remove-unwind_arm32.c-y += -pg
+cflags-remove-unwind_arm64.c-$(CFG_ARM64_core) += -pg
+endif
+endif
diff --git a/core/arch/arm/kernel/thread.c b/core/arch/arm/kernel/thread.c
index 9de12d3..edda003 100644
--- a/core/arch/arm/kernel/thread.c
+++ b/core/arch/arm/kernel/thread.c
@@ -483,6 +483,38 @@
 }
 #endif
 
+#ifdef CFG_SYSCALL_FTRACE
+static void __noprof ftrace_suspend(void)
+{
+	struct tee_ta_session *s = TAILQ_FIRST(&thread_get_tsd()->sess_stack);
+
+	if (!s)
+		return;
+
+	if (s->fbuf)
+		s->fbuf->syscall_trace_suspended = true;
+}
+
+static void __noprof ftrace_resume(void)
+{
+	struct tee_ta_session *s = TAILQ_FIRST(&thread_get_tsd()->sess_stack);
+
+	if (!s)
+		return;
+
+	if (s->fbuf)
+		s->fbuf->syscall_trace_suspended = false;
+}
+#else
+static void __noprof ftrace_suspend(void)
+{
+}
+
+static void __noprof ftrace_resume(void)
+{
+}
+#endif
+
 static bool is_user_mode(struct thread_ctx_regs *regs)
 {
 	return is_from_user((uint32_t)regs->cpsr);
@@ -531,6 +563,9 @@
 
 	thread_lazy_save_ns_vfp();
 
+	if (threads[n].have_user_map)
+		ftrace_resume();
+
 	thread_resume(&threads[n].regs);
 	/*NOTREACHED*/
 	panic();
@@ -659,6 +694,9 @@
 
 	assert(ct != -1);
 
+	if (core_mmu_user_mapping_is_active())
+		ftrace_suspend();
+
 	thread_check_canaries();
 
 	release_unused_kernel_stack(threads + ct, cpsr);
@@ -972,7 +1010,7 @@
 
 	thread_init_vbar(get_excp_vect());
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	/*
 	 * Enable accesses to frequency register and physical counter
 	 * register in EL0/PL0 required for timestamping during
diff --git a/core/arch/arm/kernel/unwind_arm64.c b/core/arch/arm/kernel/unwind_arm64.c
index 6398a3e..19cfe0f 100644
--- a/core/arch/arm/kernel/unwind_arm64.c
+++ b/core/arch/arm/kernel/unwind_arm64.c
@@ -33,9 +33,11 @@
 #include <kernel/thread.h>
 #include <kernel/unwind.h>
 #include <kernel/tee_misc.h>
+#include <kernel/tee_ta_manager.h>
 #include <string.h>
 #include <tee/tee_svc.h>
 #include <trace.h>
+#include <user_ta_header.h>
 #include <util.h>
 
 #include "unwind_private.h"
@@ -45,6 +47,41 @@
 	memcpy(reg, (void *)addr, sizeof(*reg));
 }
 
+#ifdef CFG_SYSCALL_FTRACE
+static void ftrace_core_map_lr(uint64_t *lr)
+{
+	struct ftrace_buf *fbuf = NULL;
+	struct tee_ta_session *s = NULL;
+
+	if (tee_ta_get_current_session(&s) != TEE_SUCCESS)
+		return;
+
+	if (!s->fbuf)
+		return;
+
+	fbuf = s->fbuf;
+
+	/*
+	 * Function tracer inserts return hook (addr: &__ftrace_return)
+	 * via modifying lr values in the stack frames. And during aborts,
+	 * stack trace picks these modified lr values which needs to be
+	 * replaced with original lr value. So here we use the ftrace return
+	 * stack to retrieve original lr value but we need to first check if
+	 * it has actually been modified or not in case TA is profiled
+	 * partially.
+	 */
+	if ((*lr == (uint64_t)&__ftrace_return) &&
+	    fbuf->lr_idx < fbuf->ret_idx) {
+		fbuf->lr_idx++;
+		*lr = fbuf->ret_stack[fbuf->ret_idx - fbuf->lr_idx];
+	}
+}
+#else
+static void ftrace_core_map_lr(uint64_t *lr __unused)
+{
+}
+#endif
+
 bool unwind_stack_arm64(struct unwind_state_arm64 *frame,
 			vaddr_t stack, size_t stack_size)
 {
@@ -59,6 +96,9 @@
 	copy_in_reg(&frame->fp, fp);
 	/* LR (X30) */
 	copy_in_reg(&frame->pc, fp + 8);
+
+	ftrace_core_map_lr(&frame->pc);
+
 	frame->pc -= 4;
 
 	return true;
diff --git a/core/arch/arm/kernel/user_ta.c b/core/arch/arm/kernel/user_ta.c
index 9fff2c0..dd5724e 100644
--- a/core/arch/arm/kernel/user_ta.c
+++ b/core/arch/arm/kernel/user_ta.c
@@ -250,7 +250,7 @@
 	utc->stack_ptr = arg->stack_ptr;
 	utc->ctx.flags = arg->flags;
 	utc->dump_entry_func = arg->dump_entry;
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	utc->ftrace_entry_func = arg->ftrace_entry;
 	sess->fbuf = arg->fbuf;
 #endif
@@ -455,7 +455,7 @@
 	dump_state_no_ldelf_dbg(utc);
 }
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 static TEE_Result dump_ftrace(struct user_ta_ctx *utc, void *buf, size_t *blen)
 {
 	uaddr_t usr_stack = utc->ldelf_stack_ptr;
@@ -557,7 +557,7 @@
 out_free_pl:
 	thread_rpc_free_payload(mobj);
 }
-#endif /*CFG_TA_FTRACE_SUPPORT*/
+#endif /*CFG_FTRACE_SUPPORT*/
 
 static void free_utc(struct user_ta_ctx *utc)
 {
@@ -599,7 +599,7 @@
 	.enter_invoke_cmd = user_ta_enter_invoke_cmd,
 	.enter_close_session = user_ta_enter_close_session,
 	.dump_state = user_ta_dump_state,
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	.dump_ftrace = user_ta_dump_ftrace,
 #endif
 	.destroy = user_ta_ctx_destroy,
diff --git a/core/arch/arm/mm/sub.mk b/core/arch/arm/mm/sub.mk
index c09fe59..39d9c1f 100644
--- a/core/arch/arm/mm/sub.mk
+++ b/core/arch/arm/mm/sub.mk
@@ -10,3 +10,15 @@
 srcs-y += pgt_cache.c
 srcs-y += mobj.c
 srcs-$(CFG_CORE_DYN_SHM) += mobj_dyn_shm.c
+
+ifeq ($(CFG_SYSCALL_FTRACE),y)
+# We would not like to profile MMU APIs as these are used to switch TA
+# context which may cause undesired behaviour as ftrace requires TA context
+# to be active. Moreover profiling code uses some of MMU APIs to check
+# if TA context is active or not.
+ifeq ($(CFG_WITH_LPAE),y)
+cflags-remove-core_mmu_lpae.c-y += -pg
+else
+cflags-remove-core_mmu_v7.c-y += -pg
+endif
+endif
diff --git a/core/arch/arm/sm/sm_a32.S b/core/arch/arm/sm/sm_a32.S
index 24dd8ce..5b8b093 100644
--- a/core/arch/arm/sm/sm_a32.S
+++ b/core/arch/arm/sm/sm_a32.S
@@ -45,7 +45,7 @@
 	stm	r0!, {r2}
 #endif
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	read_cntkctl r2
 	stm	r0!, {r2}
 #endif
@@ -82,7 +82,7 @@
 	write_pmcr r2
 #endif
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	ldm	r0!, {r2}
 	write_cntkctl r2
 #endif
diff --git a/core/arch/arm/tee/arch_svc.c b/core/arch/arm/tee/arch_svc.c
index 951497e..79ef6c3 100644
--- a/core/arch/arm/tee/arch_svc.c
+++ b/core/arch/arm/tee/arch_svc.c
@@ -132,6 +132,49 @@
 }
 #endif
 
+#ifdef CFG_SYSCALL_FTRACE
+static void __noprof ftrace_syscall_enter(size_t num)
+{
+	struct tee_ta_session *s = NULL;
+
+	/*
+	 * Syscalls related to inter-TA communication can't be traced in the
+	 * caller TA's ftrace buffer as it involves context switching to callee
+	 * TA's context. Moreover, user can enable ftrace for callee TA to dump
+	 * function trace in corresponding ftrace buffer.
+	 */
+	if (num == TEE_SCN_OPEN_TA_SESSION || num == TEE_SCN_CLOSE_TA_SESSION ||
+	    num == TEE_SCN_INVOKE_TA_COMMAND)
+		return;
+
+	s = TAILQ_FIRST(&thread_get_tsd()->sess_stack);
+	if (!s)
+		return;
+
+	if (s->fbuf)
+		s->fbuf->syscall_trace_enabled = true;
+}
+
+static void __noprof ftrace_syscall_leave(void)
+{
+	struct tee_ta_session *s = TAILQ_FIRST(&thread_get_tsd()->sess_stack);
+
+	if (!s)
+		return;
+
+	if (s->fbuf)
+		s->fbuf->syscall_trace_enabled = false;
+}
+#else
+static void __noprof ftrace_syscall_enter(size_t num __unused)
+{
+}
+
+static void __noprof ftrace_syscall_leave(void)
+{
+}
+#endif
+
 #ifdef ARM32
 static void get_scn_max_args(struct thread_svc_regs *regs, size_t *scn,
 		size_t *max_args)
@@ -211,8 +254,12 @@
 	else
 		scf = tee_svc_syscall_table[scn].fn;
 
+	ftrace_syscall_enter(scn);
+
 	set_svc_retval(regs, tee_svc_do_call(regs, scf));
 
+	ftrace_syscall_leave();
+
 	if (scn != TEE_SCN_RETURN) {
 		/* We're about to switch back to user mode */
 		tee_ta_update_session_utime_resume();
diff --git a/core/core.mk b/core/core.mk
index a51cf84..1532b55 100644
--- a/core/core.mk
+++ b/core/core.mk
@@ -43,6 +43,9 @@
 		   --param asan-instrumentation-with-call-threshold=0
 cflags$(sm)	+= $(cflags_kasan)
 endif
+ifeq ($(CFG_SYSCALL_FTRACE),y)
+cflags$(sm)	+= -pg
+endif
 aflags$(sm)	+= $(core-platform-aflags)
 
 cppflags$(sm) += -DTRACE_LEVEL=$(CFG_TEE_CORE_LOG_LEVEL)
diff --git a/core/include/kernel/tee_ta_manager.h b/core/include/kernel/tee_ta_manager.h
index 3abb582..08f9ca3 100644
--- a/core/include/kernel/tee_ta_manager.h
+++ b/core/include/kernel/tee_ta_manager.h
@@ -107,7 +107,7 @@
 #if defined(CFG_TA_GPROF_SUPPORT)
 	struct sample_buf *sbuf; /* Profiling data (PC sampling) */
 #endif
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 	struct ftrace_buf *fbuf; /* ftrace buffer */
 #endif
 };
@@ -172,7 +172,7 @@
 static inline void tee_ta_update_session_utime_resume(void) {}
 static inline void tee_ta_gprof_sample_pc(vaddr_t pc __unused) {}
 #endif
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 void tee_ta_ftrace_update_times_suspend(void);
 void tee_ta_ftrace_update_times_resume(void);
 #else
diff --git a/core/kernel/tee_ta_manager.c b/core/kernel/tee_ta_manager.c
index 3740dff..ea0f602 100644
--- a/core/kernel/tee_ta_manager.c
+++ b/core/kernel/tee_ta_manager.c
@@ -277,9 +277,10 @@
 static void destroy_session(struct tee_ta_session *s,
 			    struct tee_ta_session_head *open_sessions)
 {
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 	if (s->ctx && s->ctx->ops->dump_ftrace) {
 		tee_ta_push_current_session(s);
+		s->fbuf = NULL;
 		s->ctx->ops->dump_ftrace(s->ctx);
 		tee_ta_pop_current_session();
 	}
@@ -947,7 +948,7 @@
 }
 #endif
 
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 static void ftrace_update_times(bool suspend)
 {
 	struct tee_ta_session *s = NULL;
diff --git a/ldelf/ftrace.c b/ldelf/ftrace.c
index 2da68ef..4aa1116 100644
--- a/ldelf/ftrace.c
+++ b/ldelf/ftrace.c
@@ -57,6 +57,8 @@
 	fbuf->buf_off = fbuf->head_off + count;
 	fbuf->curr_size = 0;
 	fbuf->max_size = fbuf_size - sizeof(struct ftrace_buf) - count;
+	fbuf->syscall_trace_enabled = false;
+	fbuf->syscall_trace_suspended = false;
 
 	*fbuf_ptr = fbuf;
 
diff --git a/ldelf/ftrace.h b/ldelf/ftrace.h
index f7569e6..83dcf4a 100644
--- a/ldelf/ftrace.h
+++ b/ldelf/ftrace.h
@@ -9,7 +9,7 @@
 #include <types_ext.h>
 #include <user_ta_header.h>
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 bool ftrace_init(struct ftrace_buf **fbuf_ptr);
 void ftrace_copy_buf(void *pctx, void (*copy_func)(void *pctx, void *b,
 						   size_t bl));
diff --git a/ldelf/main.c b/ldelf/main.c
index ee4da5c..086917c 100644
--- a/ldelf/main.c
+++ b/ldelf/main.c
@@ -49,7 +49,7 @@
 	sys_return_cleanup();
 }
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 struct print_buf_ctx {
 	char *buf;
 	size_t blen;
@@ -159,7 +159,7 @@
 	ta_elf_finalize_load_main(&arg->entry_func);
 
 	arg->ftrace_entry = 0;
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	if (ftrace_init(&arg->fbuf))
 		arg->ftrace_entry = (vaddr_t)(void *)ftrace_dump;
 #endif
diff --git a/ldelf/sub.mk b/ldelf/sub.mk
index d1f9ae9..57f444c 100644
--- a/ldelf/sub.mk
+++ b/ldelf/sub.mk
@@ -8,4 +8,4 @@
 srcs-y += ta_elf_rel.c
 srcs-$(CFG_UNWIND) += unwind_arm32.c
 srcs-$(CFG_UNWIND) += unwind_arm64.c
-srcs-$(CFG_TA_FTRACE_SUPPORT) += ftrace.c
+srcs-$(CFG_FTRACE_SUPPORT) += ftrace.c
diff --git a/lib/libutee/include/user_ta_header.h b/lib/libutee/include/user_ta_header.h
index 7ae0f40..f9e7c07 100644
--- a/lib/libutee/include/user_ta_header.h
+++ b/lib/libutee/include/user_ta_header.h
@@ -34,7 +34,7 @@
 	uint64_t depr_entry;
 };
 
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 #define FTRACE_RETFUNC_DEPTH		50
 union compat_ptr {
 	uint64_t ptr64;
@@ -61,6 +61,8 @@
 	uint32_t max_size;	/* Max allowed size of ftrace buffer */
 	uint32_t head_off;	/* Ftrace buffer header offset */
 	uint32_t buf_off;	/* Ftrace buffer offset */
+	bool syscall_trace_enabled; /* Some syscalls are never traced */
+	bool syscall_trace_suspended; /* By foreign interrupt or RPC */
 };
 
 /* Defined by the linker script */
diff --git a/lib/libutils/ext/arch/arm/mcount_a32.S b/lib/libutils/ext/arch/arm/mcount_a32.S
index 2daed91..b49421a 100644
--- a/lib/libutils/ext/arch/arm/mcount_a32.S
+++ b/lib/libutils/ext/arch/arm/mcount_a32.S
@@ -5,7 +5,7 @@
 
 #include <asm.S>
 
-#if defined(CFG_TA_GPROF_SUPPORT) || defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_TA_GPROF_SUPPORT) || defined(CFG_FTRACE_SUPPORT)
 
 /*
  * Convert return address to call site address by subtracting the size of the
@@ -32,7 +32,7 @@
 	mcount_adj_pc	r1, lr			/* instrumented func */
 	bl		__mcount_internal
 #endif
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	/* Get instrumented function's pc value */
 	ldr		r0, [sp, #16]
 	mcount_adj_pc	r0, r0
@@ -44,7 +44,7 @@
 	bx		ip
 END_FUNC __gnu_mcount_nc
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 FUNC __ftrace_return, :
 	/* save return value regs */
 	stmdb		sp!, {r0-r3}
@@ -59,4 +59,4 @@
 END_FUNC __ftrace_return
 #endif
 
-#endif /* CFG_TA_GPROF_SUPPORT || CFG_TA_FTRACE_SUPPORT */
+#endif /* CFG_TA_GPROF_SUPPORT || CFG_FTRACE_SUPPORT */
diff --git a/lib/libutils/ext/arch/arm/mcount_a64.S b/lib/libutils/ext/arch/arm/mcount_a64.S
index 4bdead1..147e6bd 100644
--- a/lib/libutils/ext/arch/arm/mcount_a64.S
+++ b/lib/libutils/ext/arch/arm/mcount_a64.S
@@ -5,7 +5,7 @@
 
 #include <asm.S>
 
-#if defined(CFG_TA_GPROF_SUPPORT) || defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_TA_GPROF_SUPPORT) || defined(CFG_FTRACE_SUPPORT)
 
 /*
  * Convert return address to call site address by subtracting the size of one
@@ -47,7 +47,7 @@
 	adjust_pc	x1, x30
 	bl		__mcount_internal
 #endif
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	get_pc		x0
 	get_lr_addr	x1
 	bl		ftrace_enter
@@ -56,7 +56,7 @@
 	ret
 END_FUNC _mcount
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 FUNC __ftrace_return, :
 	/* Save return value regs */
 	sub		sp, sp, #64
@@ -80,4 +80,4 @@
 END_FUNC __ftrace_return
 #endif
 
-#endif /* CFG_TA_GPROF_SUPPORT || CFG_TA_FTRACE_SUPPORT */
+#endif /* CFG_TA_GPROF_SUPPORT || CFG_FTRACE_SUPPORT */
diff --git a/lib/libutils/ext/ftrace/ftrace.c b/lib/libutils/ext/ftrace/ftrace.c
index 60fcf64..8393fb3 100644
--- a/lib/libutils/ext/ftrace/ftrace.c
+++ b/lib/libutils/ext/ftrace/ftrace.c
@@ -15,6 +15,9 @@
 #if defined(__KERNEL__)
 #include <arm.h>
 #include <kernel/panic.h>
+#include <kernel/tee_ta_manager.h>
+#include <kernel/thread.h>
+#include <mm/core_mmu.h>
 #else
 #include <arm_user_sysreg.h>
 #include <setjmp.h>
@@ -29,7 +32,28 @@
 static __noprof struct ftrace_buf *get_fbuf(void)
 {
 #if defined(__KERNEL__)
-	return NULL;
+	int ct = thread_get_id_may_fail();
+	struct tee_ta_session *s = NULL;
+	struct thread_specific_data *tsd = NULL;
+
+	if (ct == -1)
+		return NULL;
+
+	if (!(core_mmu_user_va_range_is_defined() &&
+	      core_mmu_user_mapping_is_active()))
+		return NULL;
+
+	tsd = thread_get_tsd();
+	s = TAILQ_FIRST(&tsd->sess_stack);
+
+	if (!s || tsd->ctx != s->ctx)
+		return NULL;
+
+	if (s->fbuf && s->fbuf->syscall_trace_enabled &&
+	    !s->fbuf->syscall_trace_suspended)
+		return s->fbuf;
+	else
+		return NULL;
 #else
 	return &__ftrace_buf_start;
 #endif
diff --git a/lib/libutils/ext/ftrace/sub.mk b/lib/libutils/ext/ftrace/sub.mk
index eb59091..a813617 100644
--- a/lib/libutils/ext/ftrace/sub.mk
+++ b/lib/libutils/ext/ftrace/sub.mk
@@ -1,5 +1,5 @@
 cppflags-y += -I$(sub-dir)/../../..
 
 ifneq ($(sm),ldelf) # TA, core
-srcs-$(CFG_TA_FTRACE_SUPPORT) += ftrace.c
+srcs-$(CFG_FTRACE_SUPPORT) += ftrace.c
 endif
diff --git a/lib/libutils/isoc/arch/arm/setjmp_a32.S b/lib/libutils/isoc/arch/arm/setjmp_a32.S
index 460a321..43ea593 100644
--- a/lib/libutils/isoc/arch/arm/setjmp_a32.S
+++ b/lib/libutils/isoc/arch/arm/setjmp_a32.S
@@ -216,7 +216,7 @@
 #endif
 #endif		
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	stmdb		sp!, { lr }
 	/*
 	 * As ftrace is supported in ARM mode only, so hardcode jmp_buf
@@ -240,7 +240,7 @@
 
 	/* If we have stack extension code it ought to be handled here.  */
 	
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	stmdb		sp!, { a1, a2, lr }
 	/*
 	 * As ftrace is supported in ARM mode only, so hardcode jmp_buf
diff --git a/lib/libutils/isoc/arch/arm/setjmp_a64.S b/lib/libutils/isoc/arch/arm/setjmp_a64.S
index 7f79825..0859618 100644
--- a/lib/libutils/isoc/arch/arm/setjmp_a64.S
+++ b/lib/libutils/isoc/arch/arm/setjmp_a64.S
@@ -51,7 +51,7 @@
 #define REG_ONE(REG1, OFFS)		str REG1, [x0, OFFS]
 	GPR_LAYOUT
 	FPR_LAYOUT
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
 	add	x0, x0, #104
@@ -70,7 +70,7 @@
 longjmp:
 #define REG_PAIR(REG1, REG2, OFFS)	ldp REG1, REG2, [x0, OFFS]
 #define REG_ONE(REG1, OFFS)		ldr REG1, [x0, OFFS]
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 	stp	x0, x1, [sp, #-16]!
 	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
diff --git a/lib/libutils/isoc/include/setjmp.h b/lib/libutils/isoc/include/setjmp.h
index d9814fb..4971daf 100644
--- a/lib/libutils/isoc/include/setjmp.h
+++ b/lib/libutils/isoc/include/setjmp.h
@@ -57,7 +57,7 @@
 void longjmp(jmp_buf env, int val);
 int setjmp(jmp_buf env);
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 void ftrace_longjmp(unsigned int *ret_idx);
 void ftrace_setjmp(unsigned int *ret_idx);
 #endif
diff --git a/mk/config.mk b/mk/config.mk
index bae185f..7485800 100644
--- a/mk/config.mk
+++ b/mk/config.mk
@@ -355,7 +355,7 @@
 # instrumented with GCC's -pg flag and will output function tracing
 # information in ftrace.out format to /tmp/ftrace-<ta_uuid>.out (path is
 # defined in tee-supplicant)
-CFG_TA_FTRACE_SUPPORT ?= n
+CFG_FTRACE_SUPPORT ?= n
 
 # Function tracing: unit to be used when displaying durations
 #  0: always display durations in microseconds
@@ -363,14 +363,21 @@
 #     display it in milliseconds
 CFG_FTRACE_US_MS ?= 10000
 
+# Core syscall function tracing.
+# When this option is enabled, OP-TEE core is instrumented with GCC's
+# -pg flag and will output syscall function graph in user TA ftrace
+# buffer
+CFG_SYSCALL_FTRACE ?= n
+$(call cfg-depends-all,CFG_SYSCALL_FTRACE,CFG_FTRACE_SUPPORT)
+
 # Enable to compile user TA libraries with profiling (-pg).
-# Depends on CFG_TA_GPROF_SUPPORT or CFG_TA_FTRACE_SUPPORT.
+# Depends on CFG_TA_GPROF_SUPPORT or CFG_FTRACE_SUPPORT.
 CFG_ULIBS_MCOUNT ?= n
 # Profiling/tracing of syscall wrapper (utee_*)
 CFG_SYSCALL_WRAPPERS_MCOUNT ?= $(CFG_ULIBS_MCOUNT)
 
 ifeq (y,$(filter y,$(CFG_ULIBS_MCOUNT) $(CFG_SYSCALL_WRAPPERS_MCOUNT)))
-ifeq (,$(filter y,$(CFG_TA_GPROF_SUPPORT) $(CFG_TA_FTRACE_SUPPORT)))
+ifeq (,$(filter y,$(CFG_TA_GPROF_SUPPORT) $(CFG_FTRACE_SUPPORT)))
 $(error Cannot instrument user libraries if user mode profiling is disabled)
 endif
 endif
diff --git a/scripts/arm32_sysreg.py b/scripts/arm32_sysreg.py
index bcdd942..4b6f9cf 100755
--- a/scripts/arm32_sysreg.py
+++ b/scripts/arm32_sysreg.py
@@ -125,7 +125,7 @@
     print('')
     if len(descr):
         print('/* ' + descr + ' */')
-    print('static inline void write_' + reg_name.lower() + '(void)')
+    print('static inline __noprof void write_' + reg_name.lower() + '(void)')
     print('{')
     print('\t/* Register ignored */')
     print('\tasm volatile ("mcr p15, ' + opc1 + ', r0, ' + crn + ', ' +
diff --git a/scripts/symbolize.py b/scripts/symbolize.py
index 5928932..631ae6e 100755
--- a/scripts/symbolize.py
+++ b/scripts/symbolize.py
@@ -177,7 +177,11 @@
                     elf_idx = r[2]
                     if elf_idx is not None:
                         return self._elfs[int(elf_idx)][1]
-            return None
+            # In case address is not found in TA ELF file, fallback to tee.elf
+            # especially to symbolize mixed (user-space and kernel) addresses
+            # which is true when syscall ftrace is enabled along with TA
+            # ftrace.
+            return '0x0'
         else:
             # tee.elf
             return '0x0'
diff --git a/ta/arch/arm/link.mk b/ta/arch/arm/link.mk
index feeecd8..247d45b 100644
--- a/ta/arch/arm/link.mk
+++ b/ta/arch/arm/link.mk
@@ -23,7 +23,7 @@
 link-ldflags += --as-needed # Do not add dependency on unused shlib
 link-ldflags += $(link-ldflags$(sm))
 
-ifeq ($(CFG_TA_FTRACE_SUPPORT),y)
+ifeq ($(CFG_FTRACE_SUPPORT),y)
 $(link-out-dir$(sm))/dyn_list:
 	@$(cmd-echo-silent) '  GEN     $@'
 	$(q)mkdir -p $(dir $@)
diff --git a/ta/arch/arm/user_ta_header.c b/ta/arch/arm/user_ta_header.c
index 37a0b2c..ad73b46 100644
--- a/ta/arch/arm/user_ta_header.c
+++ b/ta/arch/arm/user_ta_header.c
@@ -47,7 +47,7 @@
 {
 	TEE_Result res = __utee_entry(func, session_id, up, cmd_id);
 
-#if defined(CFG_TA_FTRACE_SUPPORT)
+#if defined(CFG_FTRACE_SUPPORT)
 	/*
 	 * __ta_entry is the first TA API called from TEE core. As it being
 	 * __noreturn API, we need to call ftrace_return in this API just
@@ -126,7 +126,7 @@
 
 const size_t ta_num_props = sizeof(ta_props) / sizeof(ta_props[0]);
 
-#ifdef CFG_TA_FTRACE_SUPPORT
+#ifdef CFG_FTRACE_SUPPORT
 struct __ftrace_info __ftrace_info = {
 #ifdef __ILP32__
 	.buf_start.ptr32 = { .lo = (uint32_t)&__ftrace_buf_start },
diff --git a/ta/ta.mk b/ta/ta.mk
index dde7be6..1278670 100644
--- a/ta/ta.mk
+++ b/ta/ta.mk
@@ -35,7 +35,7 @@
 ta-mk-file-export-vars-$(sm) += CFG_SYSTEM_PTA
 ta-mk-file-export-vars-$(sm) += CFG_TA_DYNLINK
 ta-mk-file-export-vars-$(sm) += CFG_TEE_TA_LOG_LEVEL
-ta-mk-file-export-vars-$(sm) += CFG_TA_FTRACE_SUPPORT
+ta-mk-file-export-vars-$(sm) += CFG_FTRACE_SUPPORT
 ta-mk-file-export-vars-$(sm) += CFG_UNWIND
 ta-mk-file-export-vars-$(sm) += CFG_TA_MCOUNT