Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 5e3de28..e08527f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Architectures that offer an FUNCTION_TRACER implementation should
# select HAVE_FUNCTION_TRACER:
@@ -145,7 +146,7 @@
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
select GLOB
- select TASKS_RCU if PREEMPT
+ select TASKS_RCU if PREEMPTION
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
@@ -178,7 +179,7 @@
config PREEMPTIRQ_EVENTS
bool "Enable trace events for preempt and irq disable/enable"
select TRACE_IRQFLAGS
- select TRACE_PREEMPT_TOGGLE if PREEMPT
+ select TRACE_PREEMPT_TOGGLE if PREEMPTION
select GENERIC_TRACER
default n
help
@@ -213,7 +214,7 @@
bool "Preemption-off Latency Tracer"
default n
depends on !ARCH_USES_GETTIMEOFFSET
- depends on PREEMPT
+ depends on PREEMPTION
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
@@ -370,6 +371,7 @@
config PROFILE_ALL_BRANCHES
bool "Profile all if conditionals" if !FORTIFY_SOURCE
select TRACE_BRANCH_PROFILING
+ imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives
help
This tracer profiles all branch conditions. Every if ()
taken in the kernel is recorded whether it hit or miss.
@@ -461,6 +463,7 @@
bool "Enable kprobes-based dynamic events"
select TRACING
select PROBE_EVENTS
+ select DYNAMIC_EVENTS
default y
help
This allows the user to add tracing events (similar to tracepoints)
@@ -500,6 +503,7 @@
depends on PERF_EVENTS
select UPROBES
select PROBE_EVENTS
+ select DYNAMIC_EVENTS
select TRACING
default y
help
@@ -516,7 +520,11 @@
bool
default y
help
- This allows the user to attach BPF programs to kprobe events.
+ This allows the user to attach BPF programs to kprobe, uprobe, and
+ tracepoint events.
+
+config DYNAMIC_EVENTS
+ def_bool n
config PROBE_EVENTS
def_bool n
@@ -590,9 +598,19 @@
functioning properly. It will do tests on all the configured
tracers of ftrace.
+config EVENT_TRACE_STARTUP_TEST
+ bool "Run selftest on trace events"
+ depends on FTRACE_STARTUP_TEST
+ default y
+ help
+ This option performs a test on all trace events in the system.
+ It basically just enables each event and runs some code that
+ will trigger events (not necessarily the event it enables)
+ This may take some time run as there are a lot of events.
+
config EVENT_TRACE_TEST_SYSCALLS
bool "Run selftest on syscall events"
- depends on FTRACE_STARTUP_TEST
+ depends on EVENT_TRACE_STARTUP_TEST
help
This option will also enable testing every syscall event.
It only enables the event and disables it and runs various loads
@@ -630,6 +648,7 @@
depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
select TRACING_MAP
select TRACING
+ select DYNAMIC_EVENTS
default n
help
Hist triggers allow one or more arbitrary trace event fields
@@ -767,13 +786,6 @@
If unsure, say N
-config TRACING_EVENTS_GPIO
- bool "Trace gpio events"
- depends on GPIOLIB
- default y
- help
- Enable tracing events for gpio subsystem
-
config GCOV_PROFILE_FTRACE
bool "Enable GCOV profiling on ftrace subsystem"
depends on GCOV_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index f81dadb..c2b2148 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,6 +57,7 @@
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += fgraph.o
ifeq ($(CONFIG_BLOCK),y)
obj-$(CONFIG_EVENT_TRACING) += blktrace.o
endif
@@ -78,6 +79,7 @@
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
+obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2868d85..2d6e93a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -512,8 +512,6 @@
dir = debugfs_lookup(buts->name, blk_debugfs_root);
if (!dir)
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
- if (!dir)
- goto err;
bt->dev = dev;
atomic_set(&bt->dropped, 0);
@@ -522,12 +520,8 @@
ret = -EIO;
bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
&blk_dropped_fops);
- if (!bt->dropped_file)
- goto err;
bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
- if (!bt->msg_file)
- goto err;
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
@@ -723,6 +717,7 @@
#endif
case BLKTRACESTART:
start = 1;
+ /* fall through */
case BLKTRACESTOP:
ret = __blk_trace_startstop(q, start);
break;
@@ -764,9 +759,9 @@
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return NULL;
- if (!bio->bi_css)
+ if (!bio->bi_blkg)
return NULL;
- return cgroup_get_kernfs_id(bio->bi_css->cgroup);
+ return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
}
#else
static union kernfs_node_id *
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9864a35..44bd08f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -14,9 +14,51 @@
#include <linux/syscalls.h>
#include <linux/error-injection.h>
+#include <asm/tlb.h>
+
#include "trace_probe.h"
#include "trace.h"
+#define bpf_event_rcu_dereference(p) \
+ rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+
+#ifdef CONFIG_MODULES
+struct bpf_trace_module {
+ struct module *module;
+ struct list_head list;
+};
+
+static LIST_HEAD(bpf_trace_modules);
+static DEFINE_MUTEX(bpf_module_mutex);
+
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
+{
+ struct bpf_raw_event_map *btp, *ret = NULL;
+ struct bpf_trace_module *btm;
+ unsigned int i;
+
+ mutex_lock(&bpf_module_mutex);
+ list_for_each_entry(btm, &bpf_trace_modules, list) {
+ for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
+ btp = &btm->module->bpf_raw_events[i];
+ if (!strcmp(btp->tp->name, name)) {
+ if (try_module_get(btm->module))
+ ret = btp;
+ goto out;
+ }
+ }
+ }
+out:
+ mutex_unlock(&bpf_module_mutex);
+ return ret;
+}
+#else
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
+{
+ return NULL;
+}
+#endif /* CONFIG_MODULES */
+
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -100,8 +142,13 @@
{
int ret;
+ ret = security_locked_down(LOCKDOWN_BPF_READ);
+ if (ret < 0)
+ goto out;
+
ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
+out:
memset(dst, 0, size);
return ret;
@@ -126,6 +173,10 @@
* access_ok() should prevent writing to non-user memory, but in
* some situations (nommu, temporary switch, etc) access_ok() does
* not provide enough validation, hence the check on KERNEL_DS.
+ *
+ * nmi_uaccess_okay() ensures the probe is not run in an interim
+ * state, when the task or mm are switched. This is specifically
+ * required to prevent the use of temporary mm.
*/
if (unlikely(in_interrupt() ||
@@ -133,7 +184,9 @@
return -EPERM;
if (unlikely(uaccess_kernel()))
return -EPERM;
- if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
+ if (unlikely(!nmi_uaccess_okay()))
+ return -EPERM;
+ if (!access_ok(unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
@@ -365,8 +418,6 @@
.arg4_type = ARG_CONST_SIZE,
};
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
-
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_sample_data *sd)
@@ -394,28 +445,53 @@
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
- perf_event_output(event, sd, regs);
- return 0;
+ return perf_event_output(event, sd, regs);
}
+/*
+ * Support executing tracepoints in normal, irq, and nmi context that each call
+ * bpf_perf_event_output
+ */
+struct bpf_trace_sample_data {
+ struct perf_sample_data sds[3];
+};
+
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
+static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
+ struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+ int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
struct perf_raw_record raw = {
.frag = {
.size = size,
.data = data,
},
};
+ struct perf_sample_data *sd;
+ int err;
- if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
- return -EINVAL;
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ sd = &sds->sds[nest_level - 1];
+
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
+ err = -EINVAL;
+ goto out;
+ }
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, sd);
+
+out:
+ this_cpu_dec(bpf_trace_nest_level);
+ return err;
}
static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -429,14 +505,17 @@
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
-static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
+static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
+struct bpf_nested_pt_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
+static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
- struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
+ int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
@@ -451,12 +530,25 @@
.data = meta,
},
};
+ struct perf_sample_data *sd;
+ struct pt_regs *regs;
+ u64 ret;
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
+ ret = -EBUSY;
+ goto out;
+ }
+ sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - 1]);
+ regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - 1]);
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
- return __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, sd);
+out:
+ this_cpu_dec(bpf_event_output_nest_level);
+ return ret;
}
BPF_CALL_0(bpf_get_current_task)
@@ -498,6 +590,10 @@
{
int ret;
+ ret = security_locked_down(LOCKDOWN_BPF_READ);
+ if (ret < 0)
+ goto out;
+
/*
* The strncpy_from_unsafe() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
@@ -509,6 +605,7 @@
*/
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
+out:
memset(dst, 0, size);
return ret;
@@ -523,6 +620,69 @@
.arg3_type = ARG_ANYTHING,
};
+struct send_signal_irq_work {
+ struct irq_work irq_work;
+ struct task_struct *task;
+ u32 sig;
+};
+
+static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
+
+static void do_bpf_send_signal(struct irq_work *entry)
+{
+ struct send_signal_irq_work *work;
+
+ work = container_of(entry, struct send_signal_irq_work, irq_work);
+ group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, PIDTYPE_TGID);
+}
+
+BPF_CALL_1(bpf_send_signal, u32, sig)
+{
+ struct send_signal_irq_work *work = NULL;
+
+ /* Similar to bpf_probe_write_user, task needs to be
+ * in a sound condition and kernel memory access be
+ * permitted in order to send signal to the current
+ * task.
+ */
+ if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ return -EPERM;
+ if (unlikely(uaccess_kernel()))
+ return -EPERM;
+ if (unlikely(!nmi_uaccess_okay()))
+ return -EPERM;
+
+ if (in_nmi()) {
+ /* Do an early check on signal validity. Otherwise,
+ * the error is lost in deferred irq_work.
+ */
+ if (unlikely(!valid_signal(sig)))
+ return -EINVAL;
+
+ work = this_cpu_ptr(&send_signal_work);
+ if (work->irq_work.flags & IRQ_WORK_BUSY)
+ return -EBUSY;
+
+ /* Add the current task, which is the target of sending signal,
+ * to the irq_work. The current task may change when queued
+ * irq works get executed.
+ */
+ work->task = current;
+ work->sig = sig;
+ irq_work_queue(&work->irq_work);
+ return 0;
+ }
+
+ return group_send_sig_info(sig, SEND_SIG_PRIV, current, PIDTYPE_TGID);
+}
+
+static const struct bpf_func_proto bpf_send_signal_proto = {
+ .func = bpf_send_signal,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -533,6 +693,12 @@
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
+ case BPF_FUNC_map_push_elem:
+ return &bpf_map_push_elem_proto;
+ case BPF_FUNC_map_pop_elem:
+ return &bpf_map_pop_elem_proto;
+ case BPF_FUNC_map_peek_elem:
+ return &bpf_map_peek_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns:
@@ -567,6 +733,8 @@
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
#endif
+ case BPF_FUNC_send_signal:
+ return &bpf_send_signal_proto;
default:
return NULL;
}
@@ -772,16 +940,48 @@
/*
* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
* to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
+ *
+ * Since raw tracepoints run despite bpf_prog_active, support concurrent usage
+ * in normal, irq, and nmi context.
*/
-static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
+struct bpf_raw_tp_regs {
+ struct pt_regs regs[3];
+};
+static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
+static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
+static struct pt_regs *get_bpf_raw_tp_regs(void)
+{
+ struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
+
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ this_cpu_dec(bpf_raw_tp_nest_level);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return &tp_regs->regs[nest_level - 1];
+}
+
+static void put_bpf_raw_tp_regs(void)
+{
+ this_cpu_dec(bpf_raw_tp_nest_level);
+}
+
BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return ____bpf_perf_event_output(regs, map, flags, data, size);
+ ret = ____bpf_perf_event_output(regs, map, flags, data, size);
+
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
@@ -798,12 +998,18 @@
BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
struct bpf_map *, map, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
/* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
- return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
- flags, 0, 0);
+ ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
@@ -818,11 +1024,17 @@
BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
void *, buf, u32, size, u64, flags)
{
- struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+ struct pt_regs *regs = get_bpf_raw_tp_regs();
+ int ret;
+
+ if (IS_ERR(regs))
+ return PTR_ERR(regs);
perf_fetch_caller_regs(regs);
- return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
- (unsigned long) size, flags, 0);
+ ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+ (unsigned long) size, flags, 0);
+ put_bpf_raw_tp_regs();
+ return ret;
}
static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
@@ -873,6 +1085,27 @@
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off == 0) {
+ if (size != sizeof(u64) || type != BPF_READ)
+ return false;
+ info->reg_type = PTR_TO_TP_BUFFER;
+ }
+ return raw_tp_prog_is_valid_access(off, size, type, prog, info);
+}
+
+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
+ .get_func_proto = raw_tp_prog_func_proto,
+ .is_valid_access = raw_tp_writable_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
+};
+
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -963,7 +1196,7 @@
int perf_event_attach_bpf_prog(struct perf_event *event,
struct bpf_prog *prog)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret = -EEXIST;
@@ -981,7 +1214,7 @@
if (event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
if (old_array &&
bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
ret = -E2BIG;
@@ -1004,7 +1237,7 @@
void perf_event_detach_bpf_prog(struct perf_event *event)
{
- struct bpf_prog_array __rcu *old_array;
+ struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
int ret;
@@ -1013,7 +1246,7 @@
if (!event->prog)
goto unlock;
- old_array = event->tp_event->prog_array;
+ old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
if (ret == -ENOENT)
goto unlock;
@@ -1035,6 +1268,7 @@
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
+ struct bpf_prog_array *progs;
u32 *ids, prog_cnt, ids_len;
int ret;
@@ -1059,10 +1293,8 @@
*/
mutex_lock(&bpf_event_mutex);
- ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
- ids,
- ids_len,
- &prog_cnt);
+ progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ ret = bpf_prog_array_copy_info(progs, ids, ids_len, &prog_cnt);
mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
@@ -1076,7 +1308,7 @@
extern struct bpf_raw_event_map __start__bpf_raw_tp[];
extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
-struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
{
struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
@@ -1084,7 +1316,16 @@
if (!strcmp(btp->tp->name, name))
return btp;
}
- return NULL;
+
+ return bpf_get_raw_tracepoint_module(name);
+}
+
+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
+{
+ struct module *mod = __module_address((unsigned long)btp);
+
+ if (mod)
+ module_put(mod);
}
static __always_inline
@@ -1153,27 +1394,20 @@
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
return -EINVAL;
+ if (prog->aux->max_tp_access > btp->writable_size)
+ return -EINVAL;
+
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
}
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
- int err;
-
- mutex_lock(&bpf_event_mutex);
- err = __bpf_probe_register(btp, prog);
- mutex_unlock(&bpf_event_mutex);
- return err;
+ return __bpf_probe_register(btp, prog);
}
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
{
- int err;
-
- mutex_lock(&bpf_event_mutex);
- err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
- mutex_unlock(&bpf_event_mutex);
- return err;
+ return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
}
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
@@ -1222,3 +1456,67 @@
return err;
}
+
+static int __init send_signal_irq_work_init(void)
+{
+ int cpu;
+ struct send_signal_irq_work *work;
+
+ for_each_possible_cpu(cpu) {
+ work = per_cpu_ptr(&send_signal_work, cpu);
+ init_irq_work(&work->irq_work, do_bpf_send_signal);
+ }
+ return 0;
+}
+
+subsys_initcall(send_signal_irq_work_init);
+
+#ifdef CONFIG_MODULES
+static int bpf_event_notify(struct notifier_block *nb, unsigned long op,
+ void *module)
+{
+ struct bpf_trace_module *btm, *tmp;
+ struct module *mod = module;
+
+ if (mod->num_bpf_raw_events == 0 ||
+ (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+ return 0;
+
+ mutex_lock(&bpf_module_mutex);
+
+ switch (op) {
+ case MODULE_STATE_COMING:
+ btm = kzalloc(sizeof(*btm), GFP_KERNEL);
+ if (btm) {
+ btm->module = module;
+ list_add(&btm->list, &bpf_trace_modules);
+ }
+ break;
+ case MODULE_STATE_GOING:
+ list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
+ if (btm->module == module) {
+ list_del(&btm->list);
+ kfree(btm);
+ break;
+ }
+ }
+ break;
+ }
+
+ mutex_unlock(&bpf_module_mutex);
+
+ return 0;
+}
+
+static struct notifier_block bpf_module_nb = {
+ .notifier_call = bpf_event_notify,
+};
+
+static int __init bpf_event_init(void)
+{
+ register_module_notifier(&bpf_module_nb);
+ return 0;
+}
+
+fs_initcall(bpf_event_init);
+#endif /* CONFIG_MODULES */
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
new file mode 100644
index 0000000..7950a03
--- /dev/null
+++ b/kernel/trace/fgraph.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Infrastructure to took into function calls and returns.
+ * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
+ * Mostly borrowed from function tracer which
+ * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
+ *
+ * Highly modified by Steven Rostedt (VMware).
+ */
+#include <linux/suspend.h>
+#include <linux/ftrace.h>
+#include <linux/slab.h>
+
+#include <trace/events/sched.h>
+
+#include "ftrace_internal.h"
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+#define ASSIGN_OPS_HASH(opsname, val) \
+ .func_hash = val, \
+ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
+#else
+#define ASSIGN_OPS_HASH(opsname, val)
+#endif
+
+static bool kill_ftrace_graph;
+int ftrace_graph_active;
+
+/* Both enabled by default (can be cleared by function_graph tracer flags */
+static bool fgraph_sleep_time = true;
+
+/**
+ * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
+ *
+ * ftrace_graph_stop() is called when a severe error is detected in
+ * the function graph tracing. This function is called by the critical
+ * paths of function graph to keep those paths from doing any more harm.
+ */
+bool ftrace_graph_is_dead(void)
+{
+ return kill_ftrace_graph;
+}
+
+/**
+ * ftrace_graph_stop - set to permanently disable function graph tracincg
+ *
+ * In case of an error int function graph tracing, this is called
+ * to try to keep function graph tracing from causing any more harm.
+ * Usually this is pretty severe and this is called to try to at least
+ * get a warning out to the user.
+ */
+void ftrace_graph_stop(void)
+{
+ kill_ftrace_graph = true;
+}
+
+/* Add a function return address to the trace stack on thread info.*/
+static int
+ftrace_push_return_trace(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp)
+{
+ unsigned long long calltime;
+ int index;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ return -EBUSY;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /*
+ * We must make sure the ret_stack is tested before we read
+ * anything else.
+ */
+ smp_rmb();
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(¤t->trace_overrun);
+ return -EBUSY;
+ }
+
+ calltime = trace_clock_local();
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = calltime;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ current->ret_stack[index].fp = frame_pointer;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+ current->ret_stack[index].retp = retp;
+#endif
+ return 0;
+}
+
+int function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp)
+{
+ struct ftrace_graph_ent trace;
+
+ trace.func = func;
+ trace.depth = ++current->curr_ret_depth;
+
+ if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
+ goto out;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace))
+ goto out_ret;
+
+ return 0;
+ out_ret:
+ current->curr_ret_stack--;
+ out:
+ current->curr_ret_depth--;
+ return -EBUSY;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void
+ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
+ unsigned long frame_pointer)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+
+ if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic, otherwise we have no where to go */
+ *ret = (unsigned long)panic;
+ return;
+ }
+
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ /*
+ * The arch may choose to record the frame pointer used
+ * and check it here to make sure that it is what we expect it
+ * to be. If gcc does not set the place holder of the return
+ * address in the frame pointer, and does a copy instead, then
+ * the function graph trace will fail. This test detects this
+ * case.
+ *
+ * Currently, x86_32 with optimize for size (-Os) makes the latest
+ * gcc do the above.
+ *
+ * Note, -mfentry does not use frame pointers, and this test
+ * is not needed if CC_USING_FENTRY is set.
+ */
+ if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+ ftrace_graph_stop();
+ WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
+ " from func %ps return to %lx\n",
+ current->ret_stack[index].fp,
+ frame_pointer,
+ (void *)current->ret_stack[index].func,
+ current->ret_stack[index].ret);
+ *ret = (unsigned long)panic;
+ return;
+ }
+#endif
+
+ *ret = current->ret_stack[index].ret;
+ trace->func = current->ret_stack[index].func;
+ trace->calltime = current->ret_stack[index].calltime;
+ trace->overrun = atomic_read(¤t->trace_overrun);
+ trace->depth = current->curr_ret_depth--;
+ /*
+ * We still want to trace interrupts coming in if
+ * max_depth is set to 1. Make sure the decrement is
+ * seen before ftrace_graph_return.
+ */
+ barrier();
+}
+
+/*
+ * Hibernation protection.
+ * The state of the current task is too much unstable during
+ * suspend/restore to disk. We want to protect against that.
+ */
+static int
+ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
+ void *unused)
+{
+ switch (state) {
+ case PM_HIBERNATION_PREPARE:
+ pause_graph_tracing();
+ break;
+
+ case PM_POST_HIBERNATION:
+ unpause_graph_tracing();
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ftrace_suspend_notifier = {
+ .notifier_call = ftrace_suspend_notifier_call,
+};
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+{
+ struct ftrace_graph_ret trace;
+ unsigned long ret;
+
+ ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+ trace.rettime = trace_clock_local();
+ ftrace_graph_return(&trace);
+ /*
+ * The ftrace_graph_return() may still access the current
+ * ret_stack structure, we need to make sure the update of
+ * curr_ret_stack is after that.
+ */
+ barrier();
+ current->curr_ret_stack--;
+
+ if (unlikely(!ret)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic. What else to do? */
+ ret = (unsigned long)panic;
+ }
+
+ return ret;
+}
+
+/**
+ * ftrace_graph_get_ret_stack - return the entry of the shadow stack
+ * @task: The task to read the shadow stack from
+ * @idx: Index down the shadow stack
+ *
+ * Return the ret_struct on the shadow stack of the @task at the
+ * call graph at @idx starting with zero. If @idx is zero, it
+ * will return the last saved ret_stack entry. If it is greater than
+ * zero, it will return the corresponding ret_stack for the depth
+ * of saved return addresses.
+ */
+struct ftrace_ret_stack *
+ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
+{
+ idx = task->curr_ret_stack - idx;
+
+ if (idx >= 0 && idx <= task->curr_ret_stack)
+ return &task->ret_stack[idx];
+
+ return NULL;
+}
+
+/**
+ * ftrace_graph_ret_addr - convert a potentially modified stack return address
+ * to its original value
+ *
+ * This function can be called by stack unwinding code to convert a found stack
+ * return address ('ret') to its original value, in case the function graph
+ * tracer has modified it to be 'return_to_handler'. If the address hasn't
+ * been modified, the unchanged value of 'ret' is returned.
+ *
+ * 'idx' is a state variable which should be initialized by the caller to zero
+ * before the first call.
+ *
+ * 'retp' is a pointer to the return address on the stack. It's ignored if
+ * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int index = task->curr_ret_stack;
+ int i;
+
+ if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ return ret;
+
+ if (index < 0)
+ return ret;
+
+ for (i = 0; i <= index; i++)
+ if (task->ret_stack[i].retp == retp)
+ return task->ret_stack[i].ret;
+
+ return ret;
+}
+#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+ unsigned long ret, unsigned long *retp)
+{
+ int task_idx;
+
+ if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ return ret;
+
+ task_idx = task->curr_ret_stack;
+
+ if (!task->ret_stack || task_idx < *idx)
+ return ret;
+
+ task_idx -= *idx;
+ (*idx)++;
+
+ return task->ret_stack[task_idx].ret;
+}
+#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+
+static struct ftrace_ops graph_ops = {
+ .func = ftrace_stub,
+ .flags = FTRACE_OPS_FL_RECURSION_SAFE |
+ FTRACE_OPS_FL_INITIALIZED |
+ FTRACE_OPS_FL_PID |
+ FTRACE_OPS_FL_STUB,
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+ .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
+ /* trampoline_size is only needed for dynamically allocated tramps */
+#endif
+ ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
+};
+
+void ftrace_graph_sleep_time_control(bool enable)
+{
+ fgraph_sleep_time = enable;
+}
+
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+ return 0;
+}
+
+/* The callbacks that hook a function */
+trace_func_graph_ret_t ftrace_graph_return =
+ (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
+
+/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
+static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+{
+ int i;
+ int ret = 0;
+ int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
+ struct task_struct *g, *t;
+
+ for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
+ ret_stack_list[i] =
+ kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack_list[i]) {
+ start = 0;
+ end = i;
+ ret = -ENOMEM;
+ goto free;
+ }
+ }
+
+ read_lock(&tasklist_lock);
+ do_each_thread(g, t) {
+ if (start == end) {
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ if (t->ret_stack == NULL) {
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+ /* Make sure the tasks see the -1 first: */
+ smp_wmb();
+ t->ret_stack = ret_stack_list[start++];
+ }
+ } while_each_thread(g, t);
+
+unlock:
+ read_unlock(&tasklist_lock);
+free:
+ for (i = start; i < end; i++)
+ kfree(ret_stack_list[i]);
+ return ret;
+}
+
+static void
+ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
+{
+ unsigned long long timestamp;
+ int index;
+
+ /*
+ * Does the user want to count the time a function was asleep.
+ * If so, do not update the time stamps.
+ */
+ if (fgraph_sleep_time)
+ return;
+
+ timestamp = trace_clock_local();
+
+ prev->ftrace_timestamp = timestamp;
+
+ /* only process tasks that we timestamped */
+ if (!next->ftrace_timestamp)
+ return;
+
+ /*
+ * Update all the counters in next to make up for the
+ * time next was sleeping.
+ */
+ timestamp -= next->ftrace_timestamp;
+
+ for (index = next->curr_ret_stack; index >= 0; index--)
+ next->ret_stack[index].calltime += timestamp;
+}
+
+static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
+{
+ if (!ftrace_ops_test(&global_ops, trace->func, NULL))
+ return 0;
+ return __ftrace_graph_entry(trace);
+}
+
+/*
+ * The function graph tracer should only trace the functions defined
+ * by set_ftrace_filter and set_ftrace_notrace. If another function
+ * tracer ops is registered, the graph tracer requires testing the
+ * function against the global ops, and not just trace any function
+ * that any ftrace_ops registered.
+ */
+void update_function_graph_func(void)
+{
+ struct ftrace_ops *op;
+ bool do_test = false;
+
+ /*
+ * The graph and global ops share the same set of functions
+ * to test. If any other ops is on the list, then
+ * the graph tracing needs to test if its the function
+ * it should call.
+ */
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (op != &global_ops && op != &graph_ops &&
+ op != &ftrace_list_end) {
+ do_test = true;
+ /* in double loop, break out with goto */
+ goto out;
+ }
+ } while_for_each_ftrace_op(op);
+ out:
+ if (do_test)
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ else
+ ftrace_graph_entry = __ftrace_graph_entry;
+}
+
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+
+static void
+graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+ /* make curr_ret_stack visible before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
+
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack =
+ kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ graph_init_task(t, ret_stack);
+ }
+}
+
+/* Allocate a return stack for newly created task */
+void ftrace_graph_init_task(struct task_struct *t)
+{
+ /* Make sure we do not use the parent ret_stack */
+ t->ret_stack = NULL;
+ t->curr_ret_stack = -1;
+ t->curr_ret_depth = -1;
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
+ sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ graph_init_task(t, ret_stack);
+ }
+}
+
+void ftrace_graph_exit_task(struct task_struct *t)
+{
+ struct ftrace_ret_stack *ret_stack = t->ret_stack;
+
+ t->ret_stack = NULL;
+ /* NULL must become visible to IRQs before we free it: */
+ barrier();
+
+ kfree(ret_stack);
+}
+
+/* Allocate a return stack for each task */
+static int start_graph_tracing(void)
+{
+ struct ftrace_ret_stack **ret_stack_list;
+ int ret, cpu;
+
+ ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
+ sizeof(struct ftrace_ret_stack *),
+ GFP_KERNEL);
+
+ if (!ret_stack_list)
+ return -ENOMEM;
+
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
+ do {
+ ret = alloc_retstack_tasklist(ret_stack_list);
+ } while (ret == -EAGAIN);
+
+ if (!ret) {
+ ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+ if (ret)
+ pr_info("ftrace_graph: Couldn't activate tracepoint"
+ " probe to kernel_sched_switch\n");
+ }
+
+ kfree(ret_stack_list);
+ return ret;
+}
+
+int register_ftrace_graph(struct fgraph_ops *gops)
+{
+ int ret = 0;
+
+ mutex_lock(&ftrace_lock);
+
+ /* we currently allow only one tracer registered at a time */
+ if (ftrace_graph_active) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ register_pm_notifier(&ftrace_suspend_notifier);
+
+ ftrace_graph_active++;
+ ret = start_graph_tracing();
+ if (ret) {
+ ftrace_graph_active--;
+ goto out;
+ }
+
+ ftrace_graph_return = gops->retfunc;
+
+ /*
+ * Update the indirect function to the entryfunc, and the
+ * function that gets called to the entry_test first. Then
+ * call the update fgraph entry function to determine if
+ * the entryfunc should be called directly or not.
+ */
+ __ftrace_graph_entry = gops->entryfunc;
+ ftrace_graph_entry = ftrace_graph_entry_test;
+ update_function_graph_func();
+
+ ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+out:
+ mutex_unlock(&ftrace_lock);
+ return ret;
+}
+
+void unregister_ftrace_graph(struct fgraph_ops *gops)
+{
+ mutex_lock(&ftrace_lock);
+
+ if (unlikely(!ftrace_graph_active))
+ goto out;
+
+ ftrace_graph_active--;
+ ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ ftrace_graph_entry = ftrace_graph_entry_stub;
+ __ftrace_graph_entry = ftrace_graph_entry_stub;
+ ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
+ unregister_pm_notifier(&ftrace_suspend_notifier);
+ unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+
+ out:
+ mutex_unlock(&ftrace_lock);
+}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e23eb9f..f296d89 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -18,8 +18,8 @@
#include <linux/clocksource.h>
#include <linux/sched/task.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/seq_file.h>
-#include <linux/suspend.h>
#include <linux/tracefs.h>
#include <linux/hardirq.h>
#include <linux/kthread.h>
@@ -34,12 +34,14 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
#include <trace/events/sched.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include "ftrace_internal.h"
#include "trace_output.h"
#include "trace_stat.h"
@@ -69,15 +71,16 @@
#define INIT_OPS_HASH(opsname) \
.func_hash = &opsname.local_hash, \
.local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#define ASSIGN_OPS_HASH(opsname, val) \
- .func_hash = val, \
- .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
#else
#define INIT_OPS_HASH(opsname)
-#define ASSIGN_OPS_HASH(opsname, val)
#endif
-static struct ftrace_ops ftrace_list_end __read_mostly = {
+enum {
+ FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+ FTRACE_MODIFY_MAY_SLEEP_FL = (1 << 1),
+};
+
+struct ftrace_ops ftrace_list_end __read_mostly = {
.func = ftrace_stub,
.flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
INIT_OPS_HASH(ftrace_list_end)
@@ -112,11 +115,11 @@
*/
static int ftrace_disabled __read_mostly;
-static DEFINE_MUTEX(ftrace_lock);
+DEFINE_MUTEX(ftrace_lock);
-static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
+struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
-static struct ftrace_ops global_ops;
+struct ftrace_ops global_ops;
#if ARCH_SUPPORTS_FTRACE_OPS
static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
@@ -127,26 +130,6 @@
#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
#endif
-/*
- * Traverse the ftrace_global_list, invoking all entries. The reason that we
- * can use rcu_dereference_raw_notrace() is that elements removed from this list
- * are simply leaked, so there is no need to interact with a grace-period
- * mechanism. The rcu_dereference_raw_notrace() calls are needed to handle
- * concurrent insertions into the ftrace_global_list.
- *
- * Silly Alpha and silly pointer-speculation compiler optimizations!
- */
-#define do_for_each_ftrace_op(op, list) \
- op = rcu_dereference_raw_notrace(list); \
- do
-
-/*
- * Optimized for just a single item in the list (as that is the normal case).
- */
-#define while_for_each_ftrace_op(op) \
- while (likely(op = rcu_dereference_raw_notrace((op)->next)) && \
- unlikely((op) != &ftrace_list_end))
-
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -173,7 +156,7 @@
{
/*
* This function is just a stub to implement a hard force
- * of synchronize_sched(). This requires synchronizing
+ * of synchronize_rcu(). This requires synchronizing
* tasks even in userspace and idle.
*
* Yes, function tracing is rude.
@@ -186,18 +169,6 @@
smp_rmb();
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void update_function_graph_func(void);
-
-/* Both enabled by default (can be cleared by function_graph tracer flags */
-static bool fgraph_sleep_time = true;
-static bool fgraph_graph_time = true;
-
-#else
-static inline void update_function_graph_func(void) { }
-#endif
-
-
static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
{
/*
@@ -334,7 +305,7 @@
static void ftrace_update_trampoline(struct ftrace_ops *ops);
-static int __register_ftrace_function(struct ftrace_ops *ops)
+int __register_ftrace_function(struct ftrace_ops *ops)
{
if (ops->flags & FTRACE_OPS_FL_DELETED)
return -EINVAL;
@@ -375,7 +346,7 @@
return 0;
}
-static int __unregister_ftrace_function(struct ftrace_ops *ops)
+int __unregister_ftrace_function(struct ftrace_ops *ops)
{
int ret;
@@ -815,9 +786,16 @@
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static bool fgraph_graph_time = true;
+
+void ftrace_graph_graph_time_control(bool enable)
+{
+ fgraph_graph_time = enable;
+}
+
static int profile_graph_entry(struct ftrace_graph_ent *trace)
{
- int index = current->curr_ret_stack;
+ struct ftrace_ret_stack *ret_stack;
function_profile_call(trace->func, 0, NULL, NULL);
@@ -825,14 +803,16 @@
if (!current->ret_stack)
return 0;
- if (index >= 0 && index < FTRACE_RETFUNC_DEPTH)
- current->ret_stack[index].subtime = 0;
+ ret_stack = ftrace_graph_get_ret_stack(current, 0);
+ if (ret_stack)
+ ret_stack->subtime = 0;
return 1;
}
static void profile_graph_return(struct ftrace_graph_ret *trace)
{
+ struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
unsigned long long calltime;
struct ftrace_profile *rec;
@@ -850,16 +830,15 @@
calltime = trace->rettime - trace->calltime;
if (!fgraph_graph_time) {
- int index;
-
- index = current->curr_ret_stack;
/* Append this call time to the parent time to subtract */
- if (index)
- current->ret_stack[index - 1].subtime += calltime;
+ ret_stack = ftrace_graph_get_ret_stack(current, 1);
+ if (ret_stack)
+ ret_stack->subtime += calltime;
- if (current->ret_stack[index].subtime < calltime)
- calltime -= current->ret_stack[index].subtime;
+ ret_stack = ftrace_graph_get_ret_stack(current, 0);
+ if (ret_stack && ret_stack->subtime < calltime)
+ calltime -= ret_stack->subtime;
else
calltime = 0;
}
@@ -874,15 +853,19 @@
local_irq_restore(flags);
}
+static struct fgraph_ops fprofiler_ops = {
+ .entryfunc = &profile_graph_entry,
+ .retfunc = &profile_graph_return,
+};
+
static int register_ftrace_profiler(void)
{
- return register_ftrace_graph(&profile_graph_return,
- &profile_graph_entry);
+ return register_ftrace_graph(&fprofiler_ops);
}
static void unregister_ftrace_profiler(void)
{
- unregister_ftrace_graph();
+ unregister_ftrace_graph(&fprofiler_ops);
}
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
@@ -934,7 +917,7 @@
ftrace_profile_enabled = 0;
/*
* unregister_ftrace_profiler calls stop_machine
- * so this acts like an synchronize_sched.
+ * so this acts like an synchronize_rcu.
*/
unregister_ftrace_profiler();
}
@@ -1021,12 +1004,6 @@
}
#endif /* CONFIG_FUNCTION_PROFILER */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int ftrace_graph_active;
-#else
-# define ftrace_graph_active 0
-#endif
-
#ifdef CONFIG_DYNAMIC_FTRACE
static struct ftrace_ops *removed_ops;
@@ -1067,7 +1044,7 @@
};
#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash)
-static struct ftrace_ops global_ops = {
+struct ftrace_ops global_ops = {
.func = ftrace_stub,
.local_hash.notrace_hash = EMPTY_HASH,
.local_hash.filter_hash = EMPTY_HASH,
@@ -1086,7 +1063,7 @@
/*
* Some of the ops may be dynamically allocated,
- * they are freed after a synchronize_sched().
+ * they are freed after a synchronize_rcu().
*/
preempt_disable_notrace();
@@ -1286,7 +1263,7 @@
{
if (!hash || hash == EMPTY_HASH)
return;
- call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
+ call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
void ftrace_free_filter(struct ftrace_ops *ops)
@@ -1501,9 +1478,9 @@
* the ip is not in the ops->notrace_hash.
*
* This needs to be called with preemption disabled as
- * the hashes are freed with call_rcu_sched().
+ * the hashes are freed with call_rcu().
*/
-static int
+int
ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
{
struct ftrace_ops_hash hash;
@@ -1646,6 +1623,11 @@
return keep_regs;
}
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+static struct ftrace_ops *
+ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
int filter_hash,
bool inc)
@@ -1774,15 +1756,17 @@
}
/*
- * If the rec had TRAMP enabled, then it needs to
- * be cleared. As TRAMP can only be enabled iff
- * there is only a single ops attached to it.
- * In otherwords, always disable it on decrementing.
- * In the future, we may set it if rec count is
- * decremented to one, and the ops that is left
- * has a trampoline.
+ * The TRAMP needs to be set only if rec count
+ * is decremented to one, and the ops that is
+ * left has a trampoline. As TRAMP can only be
+ * enabled if there is only a single ops attached
+ * to it.
*/
- rec->flags &= ~FTRACE_FL_TRAMP;
+ if (ftrace_rec_count(rec) == 1 &&
+ ftrace_find_tramp_ops_any(rec))
+ rec->flags |= FTRACE_FL_TRAMP;
+ else
+ rec->flags &= ~FTRACE_FL_TRAMP;
/*
* flags will be cleared in ftrace_check_record()
@@ -1792,7 +1776,7 @@
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
- update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+ update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE;
/* Shortcut, if we handled all records, we are done. */
if (!all && count == hash->count)
@@ -1975,11 +1959,6 @@
printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
}
-static struct ftrace_ops *
-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
-static struct ftrace_ops *
-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
-
enum ftrace_bug_type ftrace_bug_type;
const void *ftrace_expected;
@@ -2013,7 +1992,7 @@
* modifying the code. @failed should be one of either:
* EFAULT - if the problem happens on reading the @ip address
* EINVAL - if what is read at @ip is not what was expected
- * EPERM - if the problem happens on writting to the @ip address
+ * EPERM - if the problem happens on writing to the @ip address
*/
void ftrace_bug(int failed, struct dyn_ftrace *rec)
{
@@ -2071,7 +2050,7 @@
}
}
-static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
{
unsigned long flag = 0UL;
@@ -2170,28 +2149,28 @@
/**
* ftrace_update_record, set a record that now is tracing or not
* @rec: the record to update
- * @enable: set to 1 if the record is tracing, zero to force disable
+ * @enable: set to true if the record is tracing, false to force disable
*
* The records that represent all functions that can be traced need
* to be updated when tracing has been enabled.
*/
-int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 1);
+ return ftrace_check_record(rec, enable, true);
}
/**
* ftrace_test_record, check if the record has been enabled or not
* @rec: the record to test
- * @enable: set to 1 to check if enabled, 0 if it is disabled
+ * @enable: set to true to check if enabled, false if it is disabled
*
* The arch code may need to test if a record is already set to
* tracing to determine how to modify the function code that it
* represents.
*/
-int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable)
{
- return ftrace_check_record(rec, enable, 0);
+ return ftrace_check_record(rec, enable, false);
}
static struct ftrace_ops *
@@ -2380,7 +2359,7 @@
}
static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, bool enable)
{
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2412,13 +2391,15 @@
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
}
- return -1; /* unknow ftrace bug */
+ return -1; /* unknown ftrace bug */
}
-void __weak ftrace_replace_code(int enable)
+void __weak ftrace_replace_code(int mod_flags)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
+ bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+ int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
int failed;
if (unlikely(ftrace_disabled))
@@ -2435,6 +2416,8 @@
/* Stop processing */
return;
}
+ if (schedulable)
+ cond_resched();
} while_for_each_ftrace_rec();
}
@@ -2548,8 +2531,12 @@
void ftrace_modify_all_code(int command)
{
int update = command & FTRACE_UPDATE_TRACE_FUNC;
+ int mod_flags = 0;
int err = 0;
+ if (command & FTRACE_MAY_SLEEP)
+ mod_flags = FTRACE_MODIFY_MAY_SLEEP_FL;
+
/*
* If the ftrace_caller calls a ftrace_ops func directly,
* we need to make sure that it only traces functions it
@@ -2567,9 +2554,9 @@
}
if (command & FTRACE_UPDATE_CALLS)
- ftrace_replace_code(1);
+ ftrace_replace_code(mod_flags | FTRACE_MODIFY_ENABLE_FL);
else if (command & FTRACE_DISABLE_CALLS)
- ftrace_replace_code(0);
+ ftrace_replace_code(mod_flags);
if (update && ftrace_trace_function != ftrace_ops_list_func) {
function_trace_op = set_function_trace_op;
@@ -2682,7 +2669,7 @@
update_all_ops = false;
}
-static int ftrace_startup(struct ftrace_ops *ops, int command)
+int ftrace_startup(struct ftrace_ops *ops, int command)
{
int ret;
@@ -2724,7 +2711,7 @@
return 0;
}
-static int ftrace_shutdown(struct ftrace_ops *ops, int command)
+int ftrace_shutdown(struct ftrace_ops *ops, int command)
{
int ret;
@@ -2828,7 +2815,7 @@
* synchornize_rcu_tasks() will wait for those tasks to
* execute and either schedule voluntarily or enter user space.
*/
- if (IS_ENABLED(CONFIG_PREEMPT))
+ if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
free_ops:
@@ -2951,14 +2938,13 @@
p = &pg->records[i];
p->flags = rec_flags;
-#ifndef CC_USING_NOP_MCOUNT
/*
* Do the initial record conversion from mcount jump
* to the NOP instructions.
*/
- if (!ftrace_code_disable(mod, p))
+ if (!__is_defined(CC_USING_NOP_MCOUNT) &&
+ !ftrace_code_disable(mod, p))
break;
-#endif
update_cnt++;
}
@@ -3017,7 +3003,7 @@
int cnt;
if (!num_to_init)
- return 0;
+ return NULL;
start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL);
if (!pg)
@@ -3110,6 +3096,14 @@
hnd = &iter->probe_entry->hlist;
hash = iter->probe->ops.func_hash->filter_hash;
+
+ /*
+ * A probe being registered may temporarily have an empty hash
+ * and it's at the end of the func_probes list.
+ */
+ if (!hash || hash == EMPTY_HASH)
+ return NULL;
+
size = 1 << hash->size_bits;
retry:
@@ -3493,6 +3487,11 @@
ftrace_avail_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
@@ -3512,6 +3511,15 @@
{
struct ftrace_iterator *iter;
+ /*
+ * This shows us what functions are currently being
+ * traced and by what. Not sure if we want lockdown
+ * to hide such critical information for an admin.
+ * Although, perhaps it can show information we don't
+ * want people to see, but if something is tracing
+ * something, we probably want to know about it.
+ */
+
iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
if (!iter)
return -ENOMEM;
@@ -3547,21 +3555,22 @@
struct ftrace_hash *hash;
struct list_head *mod_head;
struct trace_array *tr = ops->private;
- int ret = 0;
+ int ret = -ENOMEM;
ftrace_ops_init(ops);
if (unlikely(ftrace_disabled))
return -ENODEV;
+ if (tracing_check_open_get_tr(tr))
+ return -ENODEV;
+
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
- return -ENOMEM;
+ goto out;
- if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
- kfree(iter);
- return -ENOMEM;
- }
+ if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX))
+ goto out;
iter->ops = ops;
iter->flags = flag;
@@ -3591,13 +3600,13 @@
if (!iter->hash) {
trace_parser_put(&iter->parser);
- kfree(iter);
- ret = -ENOMEM;
goto out_unlock;
}
} else
iter->hash = hash;
+ ret = 0;
+
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
@@ -3609,7 +3618,6 @@
/* Failed */
free_ftrace_hash(iter->hash);
trace_parser_put(&iter->parser);
- kfree(iter);
}
} else
file->private_data = iter;
@@ -3617,6 +3625,13 @@
out_unlock:
mutex_unlock(&ops->func_hash->regex_lock);
+ out:
+ if (ret) {
+ kfree(iter);
+ if (tr)
+ trace_array_put(tr);
+ }
+
return ret;
}
@@ -3625,6 +3640,7 @@
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops,
FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES,
inode, file);
@@ -3635,6 +3651,7 @@
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,
inode, file);
}
@@ -3715,6 +3732,31 @@
}
static int
+add_rec_by_index(struct ftrace_hash *hash, struct ftrace_glob *func_g,
+ int clear_filter)
+{
+ long index = simple_strtoul(func_g->search, NULL, 0);
+ struct ftrace_page *pg;
+ struct dyn_ftrace *rec;
+
+ /* The index starts at 1 */
+ if (--index < 0)
+ return 0;
+
+ do_for_each_ftrace_rec(pg, rec) {
+ if (pg->index <= index) {
+ index -= pg->index;
+ /* this is a double loop, break goes to the next page */
+ break;
+ }
+ rec = &pg->records[index];
+ enter_record(hash, rec, clear_filter);
+ return 1;
+ } while_for_each_ftrace_rec();
+ return 0;
+}
+
+static int
ftrace_match_record(struct dyn_ftrace *rec, struct ftrace_glob *func_g,
struct ftrace_glob *mod_g, int exclude_mod)
{
@@ -3782,6 +3824,11 @@
if (unlikely(ftrace_disabled))
goto out_unlock;
+ if (func_g.type == MATCH_INDEX) {
+ found = add_rec_by_index(hash, &func_g, clear_filter);
+ goto out_unlock;
+ }
+
do_for_each_ftrace_rec(pg, rec) {
if (rec->flags & FTRACE_FL_DISABLED)
@@ -3862,7 +3909,7 @@
static bool module_exists(const char *module)
{
/* All modules have the symbol __this_module */
- const char this_mod[] = "__this_module";
+ static const char this_mod[] = "__this_module";
char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
unsigned long val;
int n;
@@ -4207,10 +4254,13 @@
struct ftrace_func_entry *entry;
struct ftrace_func_map *map;
struct hlist_head *hhd;
- int size = 1 << mapper->hash.size_bits;
- int i;
+ int size, i;
+
+ if (!mapper)
+ return;
if (free_func && mapper->hash.count) {
+ size = 1 << mapper->hash.size_bits;
for (i = 0; i < size; i++) {
hhd = &mapper->hash.buckets[i];
hlist_for_each_entry(entry, hhd, hlist) {
@@ -4302,12 +4352,21 @@
mutex_unlock(&ftrace_lock);
+ /*
+ * Note, there's a small window here that the func_hash->filter_hash
+ * may be NULL or empty. Need to be carefule when reading the loop.
+ */
mutex_lock(&probe->ops.func_hash->regex_lock);
orig_hash = &probe->ops.func_hash->filter_hash;
old_hash = *orig_hash;
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
+ if (!hash) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
ret = ftrace_match_records(hash, glob, strlen(glob));
/* Nothing found? */
@@ -4496,7 +4555,7 @@
if (ftrace_enabled && !ftrace_hash_empty(hash))
ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS,
&old_hash_ops);
- synchronize_sched();
+ synchronize_rcu();
hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) {
hlist_del(&entry->hlist);
@@ -4738,7 +4797,7 @@
ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove,
int reset, int enable)
{
- return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable);
}
/**
@@ -5002,6 +5061,8 @@
mutex_unlock(&iter->ops->func_hash->regex_lock);
free_ftrace_hash(iter->hash);
+ if (iter->tr)
+ trace_array_put(iter->tr);
kfree(iter);
return 0;
@@ -5159,9 +5220,13 @@
__ftrace_graph_open(struct inode *inode, struct file *file,
struct ftrace_graph_data *fgd)
{
- int ret = 0;
+ int ret;
struct ftrace_hash *new_hash = NULL;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if (file->f_mode & FMODE_WRITE) {
const int size_bits = FTRACE_HASH_DEFAULT_BITS;
@@ -5314,7 +5379,7 @@
mutex_unlock(&graph_lock);
/* Wait till all users are no longer using the old hash */
- synchronize_sched();
+ synchronize_rcu();
free_ftrace_hash(old_hash);
}
@@ -5446,7 +5511,7 @@
/*
* The name "destroy_filter_files" is really a misnomer. Although
- * in the future, it may actualy delete the files, but this is
+ * in the future, it may actually delete the files, but this is
* really intended to make sure the ops passed in are disabled
* and that when this function returns, the caller is free to
* free the ops.
@@ -5708,7 +5773,7 @@
list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
if (mod_map->mod == mod) {
list_del_rcu(&mod_map->list);
- call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map);
+ call_rcu(&mod_map->rcu, ftrace_free_mod_map);
break;
}
}
@@ -5769,7 +5834,7 @@
/*
* If the tracing is enabled, go ahead and enable the record.
*
- * The reason not to enable the record immediatelly is the
+ * The reason not to enable the record immediately is the
* inherent check of ftrace_make_nop/ftrace_make_call for
* correct previous instructions. Making first the NOP
* conversion puts the module to the correct state, thus
@@ -5928,7 +5993,7 @@
struct ftrace_mod_map *mod_map;
const char *ret = NULL;
- /* mod_map is freed via call_rcu_sched() */
+ /* mod_map is freed via call_rcu() */
preempt_disable();
list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) {
ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym);
@@ -6001,11 +6066,7 @@
{
struct ftrace_func_entry *entry;
- if (ftrace_hash_empty(hash))
- return;
-
- entry = __ftrace_lookup_ip(hash, func->ip);
-
+ entry = ftrace_lookup_ip(hash, func->ip);
/*
* Do not allow this rec to match again.
* Yeah, it may waste some memory, but will be removed
@@ -6178,7 +6239,7 @@
}
#else
-static struct ftrace_ops global_ops = {
+struct ftrace_ops global_ops = {
.func = ftrace_stub,
.flags = FTRACE_OPS_FL_RECURSION_SAFE |
FTRACE_OPS_FL_INITIALIZED |
@@ -6195,31 +6256,10 @@
static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
static inline void ftrace_startup_enable(int command) { }
static inline void ftrace_startup_all(int command) { }
-/* Keep as macros so we do not need to define the commands */
-# define ftrace_startup(ops, command) \
- ({ \
- int ___ret = __register_ftrace_function(ops); \
- if (!___ret) \
- (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
- ___ret; \
- })
-# define ftrace_shutdown(ops, command) \
- ({ \
- int ___ret = __unregister_ftrace_function(ops); \
- if (!___ret) \
- (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \
- ___ret; \
- })
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
-static inline int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
-{
- return 1;
-}
-
static void ftrace_update_trampoline(struct ftrace_ops *ops)
{
}
@@ -6250,7 +6290,7 @@
tr->ops->func = ftrace_stub;
}
-static inline void
+static nokprobe_inline void
__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ignored, struct pt_regs *regs)
{
@@ -6263,11 +6303,14 @@
/*
* Some of the ops may be dynamically allocated,
- * they must be freed after a synchronize_sched().
+ * they must be freed after a synchronize_rcu().
*/
preempt_disable_notrace();
do_for_each_ftrace_op(op, ftrace_ops_list) {
+ /* Stub functions don't need to be called nor tested */
+ if (op->flags & FTRACE_OPS_FL_STUB)
+ continue;
/*
* Check the following for each ops before calling their func:
* if RCU flag is set, then rcu_is_watching() must be true
@@ -6310,11 +6353,13 @@
{
__ftrace_ops_list_func(ip, parent_ip, NULL, regs);
}
+NOKPROBE_SYMBOL(ftrace_ops_list_func);
#else
static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
{
__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
}
+NOKPROBE_SYMBOL(ftrace_ops_no_ops);
#endif
/*
@@ -6341,6 +6386,7 @@
preempt_enable_notrace();
trace_clear_recursion(bit);
}
+NOKPROBE_SYMBOL(ftrace_ops_assist_func);
/**
* ftrace_ops_get_func - get the function a trampoline should call
@@ -6434,7 +6480,7 @@
rcu_assign_pointer(tr->function_pids, NULL);
/* Wait till all users are no longer using pid filtering */
- synchronize_sched();
+ synchronize_rcu();
trace_free_pid_list(pid_list);
}
@@ -6521,8 +6567,9 @@
struct seq_file *m;
int ret = 0;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
@@ -6581,7 +6628,7 @@
rcu_assign_pointer(tr->function_pids, pid_list);
if (filtered_pids) {
- synchronize_sched();
+ synchronize_rcu();
trace_free_pid_list(filtered_pids);
} else if (pid_list) {
/* Register a probe to set whether to ignore the tracing of a task */
@@ -6746,353 +6793,3 @@
mutex_unlock(&ftrace_lock);
return ret;
}
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-static struct ftrace_ops graph_ops = {
- .func = ftrace_stub,
- .flags = FTRACE_OPS_FL_RECURSION_SAFE |
- FTRACE_OPS_FL_INITIALIZED |
- FTRACE_OPS_FL_PID |
- FTRACE_OPS_FL_STUB,
-#ifdef FTRACE_GRAPH_TRAMP_ADDR
- .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
- /* trampoline_size is only needed for dynamically allocated tramps */
-#endif
- ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
-
-void ftrace_graph_sleep_time_control(bool enable)
-{
- fgraph_sleep_time = enable;
-}
-
-void ftrace_graph_graph_time_control(bool enable)
-{
- fgraph_graph_time = enable;
-}
-
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
-{
- return 0;
-}
-
-/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
-trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
-static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
-
-/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
-static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
-{
- int i;
- int ret = 0;
- int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
- struct task_struct *g, *t;
-
- for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
- ret_stack_list[i] =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack_list[i]) {
- start = 0;
- end = i;
- ret = -ENOMEM;
- goto free;
- }
- }
-
- read_lock(&tasklist_lock);
- do_each_thread(g, t) {
- if (start == end) {
- ret = -EAGAIN;
- goto unlock;
- }
-
- if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->curr_ret_stack = -1;
- t->curr_ret_depth = -1;
- /* Make sure the tasks see the -1 first: */
- smp_wmb();
- t->ret_stack = ret_stack_list[start++];
- }
- } while_each_thread(g, t);
-
-unlock:
- read_unlock(&tasklist_lock);
-free:
- for (i = start; i < end; i++)
- kfree(ret_stack_list[i]);
- return ret;
-}
-
-static void
-ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- struct task_struct *prev, struct task_struct *next)
-{
- unsigned long long timestamp;
- int index;
-
- /*
- * Does the user want to count the time a function was asleep.
- * If so, do not update the time stamps.
- */
- if (fgraph_sleep_time)
- return;
-
- timestamp = trace_clock_local();
-
- prev->ftrace_timestamp = timestamp;
-
- /* only process tasks that we timestamped */
- if (!next->ftrace_timestamp)
- return;
-
- /*
- * Update all the counters in next to make up for the
- * time next was sleeping.
- */
- timestamp -= next->ftrace_timestamp;
-
- for (index = next->curr_ret_stack; index >= 0; index--)
- next->ret_stack[index].calltime += timestamp;
-}
-
-/* Allocate a return stack for each task */
-static int start_graph_tracing(void)
-{
- struct ftrace_ret_stack **ret_stack_list;
- int ret, cpu;
-
- ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
- sizeof(struct ftrace_ret_stack *),
- GFP_KERNEL);
-
- if (!ret_stack_list)
- return -ENOMEM;
-
- /* The cpu_boot init_task->ret_stack will never be freed */
- for_each_online_cpu(cpu) {
- if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_idle_task(idle_task(cpu), cpu);
- }
-
- do {
- ret = alloc_retstack_tasklist(ret_stack_list);
- } while (ret == -EAGAIN);
-
- if (!ret) {
- ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
- if (ret)
- pr_info("ftrace_graph: Couldn't activate tracepoint"
- " probe to kernel_sched_switch\n");
- }
-
- kfree(ret_stack_list);
- return ret;
-}
-
-/*
- * Hibernation protection.
- * The state of the current task is too much unstable during
- * suspend/restore to disk. We want to protect against that.
- */
-static int
-ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
- void *unused)
-{
- switch (state) {
- case PM_HIBERNATION_PREPARE:
- pause_graph_tracing();
- break;
-
- case PM_POST_HIBERNATION:
- unpause_graph_tracing();
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
-{
- if (!ftrace_ops_test(&global_ops, trace->func, NULL))
- return 0;
- return __ftrace_graph_entry(trace);
-}
-
-/*
- * The function graph tracer should only trace the functions defined
- * by set_ftrace_filter and set_ftrace_notrace. If another function
- * tracer ops is registered, the graph tracer requires testing the
- * function against the global ops, and not just trace any function
- * that any ftrace_ops registered.
- */
-static void update_function_graph_func(void)
-{
- struct ftrace_ops *op;
- bool do_test = false;
-
- /*
- * The graph and global ops share the same set of functions
- * to test. If any other ops is on the list, then
- * the graph tracing needs to test if its the function
- * it should call.
- */
- do_for_each_ftrace_op(op, ftrace_ops_list) {
- if (op != &global_ops && op != &graph_ops &&
- op != &ftrace_list_end) {
- do_test = true;
- /* in double loop, break out with goto */
- goto out;
- }
- } while_for_each_ftrace_op(op);
- out:
- if (do_test)
- ftrace_graph_entry = ftrace_graph_entry_test;
- else
- ftrace_graph_entry = __ftrace_graph_entry;
-}
-
-static struct notifier_block ftrace_suspend_notifier = {
- .notifier_call = ftrace_suspend_notifier_call,
-};
-
-int register_ftrace_graph(trace_func_graph_ret_t retfunc,
- trace_func_graph_ent_t entryfunc)
-{
- int ret = 0;
-
- mutex_lock(&ftrace_lock);
-
- /* we currently allow only one tracer registered at a time */
- if (ftrace_graph_active) {
- ret = -EBUSY;
- goto out;
- }
-
- register_pm_notifier(&ftrace_suspend_notifier);
-
- ftrace_graph_active++;
- ret = start_graph_tracing();
- if (ret) {
- ftrace_graph_active--;
- goto out;
- }
-
- ftrace_graph_return = retfunc;
-
- /*
- * Update the indirect function to the entryfunc, and the
- * function that gets called to the entry_test first. Then
- * call the update fgraph entry function to determine if
- * the entryfunc should be called directly or not.
- */
- __ftrace_graph_entry = entryfunc;
- ftrace_graph_entry = ftrace_graph_entry_test;
- update_function_graph_func();
-
- ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
-out:
- mutex_unlock(&ftrace_lock);
- return ret;
-}
-
-void unregister_ftrace_graph(void)
-{
- mutex_lock(&ftrace_lock);
-
- if (unlikely(!ftrace_graph_active))
- goto out;
-
- ftrace_graph_active--;
- ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
- ftrace_graph_entry = ftrace_graph_entry_stub;
- __ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
- unregister_pm_notifier(&ftrace_suspend_notifier);
- unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
-
- out:
- mutex_unlock(&ftrace_lock);
-}
-
-static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
-
-static void
-graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
-{
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visible before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
-}
-
-/*
- * Allocate a return stack for the idle task. May be the first
- * time through, or it may be done by CPU hotplug online.
- */
-void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
-{
- t->curr_ret_stack = -1;
- t->curr_ret_depth = -1;
- /*
- * The idle task has no parent, it either has its own
- * stack or no stack at all.
- */
- if (t->ret_stack)
- WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = per_cpu(idle_ret_stack, cpu);
- if (!ret_stack) {
- ret_stack =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- per_cpu(idle_ret_stack, cpu) = ret_stack;
- }
- graph_init_task(t, ret_stack);
- }
-}
-
-/* Allocate a return stack for newly created task */
-void ftrace_graph_init_task(struct task_struct *t)
-{
- /* Make sure we do not use the parent ret_stack */
- t->ret_stack = NULL;
- t->curr_ret_stack = -1;
- t->curr_ret_depth = -1;
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- graph_init_task(t, ret_stack);
- }
-}
-
-void ftrace_graph_exit_task(struct task_struct *t)
-{
- struct ftrace_ret_stack *ret_stack = t->ret_stack;
-
- t->ret_stack = NULL;
- /* NULL must become visible to IRQs before we free it: */
- barrier();
-
- kfree(ret_stack);
-}
-#endif
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
new file mode 100644
index 0000000..0456e0a
--- /dev/null
+++ b/kernel/trace/ftrace_internal.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KERNEL_FTRACE_INTERNAL_H
+#define _LINUX_KERNEL_FTRACE_INTERNAL_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+/*
+ * Traverse the ftrace_global_list, invoking all entries. The reason that we
+ * can use rcu_dereference_raw_check() is that elements removed from this list
+ * are simply leaked, so there is no need to interact with a grace-period
+ * mechanism. The rcu_dereference_raw_check() calls are needed to handle
+ * concurrent insertions into the ftrace_global_list.
+ *
+ * Silly Alpha and silly pointer-speculation compiler optimizations!
+ */
+#define do_for_each_ftrace_op(op, list) \
+ op = rcu_dereference_raw_check(list); \
+ do
+
+/*
+ * Optimized for just a single item in the list (as that is the normal case).
+ */
+#define while_for_each_ftrace_op(op) \
+ while (likely(op = rcu_dereference_raw_check((op)->next)) && \
+ unlikely((op) != &ftrace_list_end))
+
+extern struct ftrace_ops __rcu *ftrace_ops_list;
+extern struct ftrace_ops ftrace_list_end;
+extern struct mutex ftrace_lock;
+extern struct ftrace_ops global_ops;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+int ftrace_startup(struct ftrace_ops *ops, int command);
+int ftrace_shutdown(struct ftrace_ops *ops, int command);
+int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs);
+
+#else /* !CONFIG_DYNAMIC_FTRACE */
+
+int __register_ftrace_function(struct ftrace_ops *ops);
+int __unregister_ftrace_function(struct ftrace_ops *ops);
+/* Keep as macros so we do not need to define the commands */
+# define ftrace_startup(ops, command) \
+ ({ \
+ int ___ret = __register_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags |= FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
+ })
+# define ftrace_shutdown(ops, command) \
+ ({ \
+ int ___ret = __unregister_ftrace_function(ops); \
+ if (!___ret) \
+ (ops)->flags &= ~FTRACE_OPS_FL_ENABLED; \
+ ___ret; \
+ })
+static inline int
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
+{
+ return 1;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+extern int ftrace_graph_active;
+void update_function_graph_func(void);
+#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
+# define ftrace_graph_active 0
+static inline void update_function_graph_func(void) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#else /* !CONFIG_FUNCTION_TRACER */
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 65bd461..66358d6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -128,16 +128,7 @@
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
-
-#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
-# define RB_FORCE_8BYTE_ALIGNMENT 0
-# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
-#else
-# define RB_FORCE_8BYTE_ALIGNMENT 1
-# define RB_ARCH_ALIGNMENT 8U
-#endif
-
-#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+#define RB_ALIGN_DATA __aligned(RB_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -353,20 +344,6 @@
local_set(&bpage->commit, 0);
}
-/**
- * ring_buffer_page_len - the size of data on the page.
- * @page: The page to read
- *
- * Returns the amount of data on the page, including buffer page header.
- */
-size_t ring_buffer_page_len(void *page)
-{
- struct buffer_data_page *bpage = page;
-
- return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
- + BUF_PAGE_HDR_SIZE;
-}
-
/*
* Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
* this issue out.
@@ -487,6 +464,10 @@
local_t dropped_events;
local_t committing;
local_t commits;
+ local_t pages_touched;
+ local_t pages_read;
+ long last_pages_touch;
+ size_t shortest_full;
unsigned long read;
unsigned long read_bytes;
u64 write_stamp;
@@ -529,6 +510,41 @@
u64 read_stamp;
};
+/**
+ * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages used by a per_cpu buffer of the ring buffer.
+ */
+size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu)
+{
+ return buffer->buffers[cpu]->nr_pages;
+}
+
+/**
+ * ring_buffer_nr_pages_dirty - get the number of used pages in the ring buffer
+ * @buffer: The ring_buffer to get the number of pages from
+ * @cpu: The cpu of the ring_buffer to get the number of pages from
+ *
+ * Returns the number of pages that have content in the ring buffer.
+ */
+size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu)
+{
+ size_t read;
+ size_t cnt;
+
+ read = local_read(&buffer->buffers[cpu]->pages_read);
+ cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+ /* The reader can read an empty page, but not more than that */
+ if (cnt < read) {
+ WARN_ON_ONCE(read > cnt + 1);
+ return 0;
+ }
+
+ return cnt - read;
+}
+
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@@ -556,7 +572,7 @@
* as data is added to any of the @buffer's cpu buffers. Otherwise
* it will wait for data to be added to a specific cpu buffer.
*/
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, int full)
{
struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
DEFINE_WAIT(wait);
@@ -571,7 +587,7 @@
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
/* Full only makes sense on per cpu reads */
- full = false;
+ full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -ENODEV;
@@ -623,15 +639,22 @@
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
+ size_t nr_pages;
+ size_t dirty;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ if (!cpu_buffer->shortest_full ||
+ cpu_buffer->shortest_full < full)
+ cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
- if (!pagebusy)
+ if (!pagebusy &&
+ (!nr_pages || (dirty * 100) > full * nr_pages))
break;
}
@@ -730,7 +753,7 @@
preempt_disable_notrace();
time = rb_time_stamp(buffer);
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return time;
}
@@ -1054,6 +1077,7 @@
old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+ local_inc(&cpu_buffer->pages_touched);
/*
* Just make sure we have seen our old_write and synchronize
* with any interrupts that come in.
@@ -1834,7 +1858,7 @@
* There could have been a race between checking
* record_disable and incrementing it.
*/
- synchronize_sched();
+ synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
rb_check_pages(cpu_buffer);
@@ -2340,7 +2364,7 @@
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+ if (length > RB_MAX_SMALL_DATA) {
event->type_len = 0;
event->array[0] = length;
} else
@@ -2355,11 +2379,11 @@
if (!length)
length++;
- if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+ if (length > RB_MAX_SMALL_DATA)
length += sizeof(event.array[0]);
length += RB_EVNT_HDR_SIZE;
- length = ALIGN(length, RB_ARCH_ALIGNMENT);
+ length = ALIGN(length, RB_ALIGNMENT);
/*
* In case the time delta is larger than the 27 bits for it
@@ -2586,7 +2610,9 @@
static __always_inline void
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
- bool pagebusy;
+ size_t nr_pages;
+ size_t dirty;
+ size_t full;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
@@ -2600,14 +2626,27 @@
irq_work_queue(&cpu_buffer->irq_work.work);
}
- pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+ if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched))
+ return;
- if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) {
- cpu_buffer->irq_work.wakeup_full = true;
- cpu_buffer->irq_work.full_waiters_pending = false;
- /* irq_work_queue() supplies it's own memory barriers */
- irq_work_queue(&cpu_buffer->irq_work.work);
- }
+ if (cpu_buffer->reader_page == cpu_buffer->commit_page)
+ return;
+
+ if (!cpu_buffer->irq_work.full_waiters_pending)
+ return;
+
+ cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
+
+ full = cpu_buffer->shortest_full;
+ nr_pages = cpu_buffer->nr_pages;
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
+ if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+ return;
+
+ cpu_buffer->irq_work.wakeup_full = true;
+ cpu_buffer->irq_work.full_waiters_pending = false;
+ /* irq_work_queue() supplies it's own memory barriers */
+ irq_work_queue(&cpu_buffer->irq_work.work);
}
/*
@@ -3151,7 +3190,7 @@
* This prevents all writes to the buffer. Any attempt to write
* to the buffer after this will fail and return NULL.
*
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
*/
void ring_buffer_record_disable(struct ring_buffer *buffer)
{
@@ -3253,7 +3292,7 @@
* This prevents all writes to the buffer. Any attempt to write
* to the buffer after this will fail and return NULL.
*
- * The caller should call synchronize_sched() after this.
+ * The caller should call synchronize_rcu() after this.
*/
void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
{
@@ -3732,13 +3771,15 @@
goto spin;
/*
- * Yeah! We succeeded in replacing the page.
+ * Yay! We succeeded in replacing the page.
*
* Now make the new head point back to the reader page.
*/
rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list;
rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+ local_inc(&cpu_buffer->pages_read);
+
/* Finally update the reader page to the new head */
cpu_buffer->reader_page = reader;
cpu_buffer->reader_page->read = 0;
@@ -4141,6 +4182,7 @@
* ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
* @buffer: The ring buffer to read from
* @cpu: The cpu buffer to iterate over
+ * @flags: gfp flags to use for memory allocation
*
* This performs the initial preparations necessary to iterate
* through the buffer. Memory is allocated, buffer recording
@@ -4158,7 +4200,7 @@
* This overall must be paired with ring_buffer_read_finish.
*/
struct ring_buffer_iter *
-ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_iter *iter;
@@ -4166,7 +4208,7 @@
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
- iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ iter = kmalloc(sizeof(*iter), flags);
if (!iter)
return NULL;
@@ -4191,7 +4233,7 @@
void
ring_buffer_read_prepare_sync(void)
{
- synchronize_sched();
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
@@ -4334,6 +4376,10 @@
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
+ local_set(&cpu_buffer->pages_touched, 0);
+ local_set(&cpu_buffer->pages_read, 0);
+ cpu_buffer->last_pages_touch = 0;
+ cpu_buffer->shortest_full = 0;
cpu_buffer->read = 0;
cpu_buffer->read_bytes = 0;
@@ -4363,7 +4409,7 @@
atomic_inc(&cpu_buffer->record_disabled);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -4496,7 +4542,7 @@
goto out;
/*
- * We can't do a synchronize_sched here because this
+ * We can't do a synchronize_rcu here because this
* function can be called in atomic context.
* Normally this will be called from the same CPU as cpu.
* If not it's up to the caller to protect this.
@@ -4924,7 +4970,7 @@
cnt = data->cnt + (nested ? 27 : 0);
/* Multiply cnt by ~e, to make some unique increment */
- size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
+ size = (cnt * 68 / 25) % (sizeof(rb_string) - 1);
len = size + sizeof(struct rb_item);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index ffba678..09b0b49 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -267,7 +267,7 @@
if (consumer && !(cnt % wakeup_interval))
wake_up_process(consumer);
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
/*
* If we are a non preempt kernel, the 10 second run will
* stop everything while it runs. Instead, we will call
@@ -362,7 +362,7 @@
hit--; /* make it non zero */
}
- /* Caculate the average time in nanosecs */
+ /* Calculate the average time in nanosecs */
avg = NSEC_PER_MSEC / (hit + missed);
trace_printk("%ld ns per entry\n", avg);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bf6f1d7..6a0ee91 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
@@ -159,6 +160,8 @@
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
static int tracing_set_tracer(struct trace_array *tr, const char *buf);
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+ unsigned long flags, int pc);
#define MAX_TRACER_SIZE 100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
@@ -302,6 +305,23 @@
mutex_unlock(&trace_types_lock);
}
+int tracing_check_open_get_tr(struct trace_array *tr)
+{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (tr && trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ return 0;
+}
+
int call_filter_check_discard(struct trace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
@@ -364,7 +384,7 @@
}
/**
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
* @pid_list: The list to modify
* @self: The current task for fork or NULL for exit
* @task: The task to add or remove
@@ -496,8 +516,10 @@
* not modified.
*/
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
- if (!pid_list)
+ if (!pid_list) {
+ trace_parser_put(&parser);
return -ENOMEM;
+ }
pid_list->pid_max = READ_ONCE(pid_max);
@@ -507,6 +529,7 @@
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
+ trace_parser_put(&parser);
kfree(pid_list);
return -ENOMEM;
}
@@ -738,8 +761,7 @@
{
struct trace_entry *ent = ring_buffer_event_data(event);
- tracing_generic_entry_update(ent, flags, pc);
- ent->type = type;
+ tracing_generic_entry_update(ent, type, flags, pc);
}
static __always_inline struct ring_buffer_event *
@@ -894,7 +916,7 @@
EXPORT_SYMBOL_GPL(__trace_bputs);
#ifdef CONFIG_TRACER_SNAPSHOT
-void tracing_snapshot_instance(struct trace_array *tr)
+void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
{
struct tracer *tracer = tr->current_trace;
unsigned long flags;
@@ -920,10 +942,15 @@
}
local_irq_save(flags);
- update_max_tr(tr, current, smp_processor_id());
+ update_max_tr(tr, current, smp_processor_id(), cond_data);
local_irq_restore(flags);
}
+void tracing_snapshot_instance(struct trace_array *tr)
+{
+ tracing_snapshot_instance_cond(tr, NULL);
+}
+
/**
* tracing_snapshot - take a snapshot of the current buffer.
*
@@ -946,6 +973,54 @@
}
EXPORT_SYMBOL_GPL(tracing_snapshot);
+/**
+ * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
+ * @tr: The tracing instance to snapshot
+ * @cond_data: The data to be tested conditionally, and possibly saved
+ *
+ * This is the same as tracing_snapshot() except that the snapshot is
+ * conditional - the snapshot will only happen if the
+ * cond_snapshot.update() implementation receiving the cond_data
+ * returns true, which means that the trace array's cond_snapshot
+ * update() operation used the cond_data to determine whether the
+ * snapshot should be taken, and if it was, presumably saved it along
+ * with the snapshot.
+ */
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
+{
+ tracing_snapshot_instance_cond(tr, cond_data);
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
+
+/**
+ * tracing_snapshot_cond_data - get the user data associated with a snapshot
+ * @tr: The tracing instance
+ *
+ * When the user enables a conditional snapshot using
+ * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
+ * with the snapshot. This accessor is used to retrieve it.
+ *
+ * Should not be called from cond_snapshot.update(), since it takes
+ * the tr->max_lock lock, which the code calling
+ * cond_snapshot.update() has already done.
+ *
+ * Returns the cond_data associated with the trace array's snapshot.
+ */
+void *tracing_cond_snapshot_data(struct trace_array *tr)
+{
+ void *cond_data = NULL;
+
+ arch_spin_lock(&tr->max_lock);
+
+ if (tr->cond_snapshot)
+ cond_data = tr->cond_snapshot->cond_data;
+
+ arch_spin_unlock(&tr->max_lock);
+
+ return cond_data;
+}
+EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
+
static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
struct trace_buffer *size_buf, int cpu_id);
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
@@ -1025,12 +1100,111 @@
tracing_snapshot();
}
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+
+/**
+ * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
+ * @tr: The tracing instance
+ * @cond_data: User data to associate with the snapshot
+ * @update: Implementation of the cond_snapshot update function
+ *
+ * Check whether the conditional snapshot for the given instance has
+ * already been enabled, or if the current tracer is already using a
+ * snapshot; if so, return -EBUSY, else create a cond_snapshot and
+ * save the cond_data and update function inside.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
+ cond_update_fn_t update)
+{
+ struct cond_snapshot *cond_snapshot;
+ int ret = 0;
+
+ cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
+ if (!cond_snapshot)
+ return -ENOMEM;
+
+ cond_snapshot->cond_data = cond_data;
+ cond_snapshot->update = update;
+
+ mutex_lock(&trace_types_lock);
+
+ ret = tracing_alloc_snapshot_instance(tr);
+ if (ret)
+ goto fail_unlock;
+
+ if (tr->current_trace->use_max_tr) {
+ ret = -EBUSY;
+ goto fail_unlock;
+ }
+
+ /*
+ * The cond_snapshot can only change to NULL without the
+ * trace_types_lock. We don't care if we race with it going
+ * to NULL, but we want to make sure that it's not set to
+ * something other than NULL when we get here, which we can
+ * do safely with only holding the trace_types_lock and not
+ * having to take the max_lock.
+ */
+ if (tr->cond_snapshot) {
+ ret = -EBUSY;
+ goto fail_unlock;
+ }
+
+ arch_spin_lock(&tr->max_lock);
+ tr->cond_snapshot = cond_snapshot;
+ arch_spin_unlock(&tr->max_lock);
+
+ mutex_unlock(&trace_types_lock);
+
+ return ret;
+
+ fail_unlock:
+ mutex_unlock(&trace_types_lock);
+ kfree(cond_snapshot);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
+
+/**
+ * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
+ * @tr: The tracing instance
+ *
+ * Check whether the conditional snapshot for the given instance is
+ * enabled; if so, free the cond_snapshot associated with it,
+ * otherwise return -EINVAL.
+ *
+ * Returns 0 if successful, error otherwise.
+ */
+int tracing_snapshot_cond_disable(struct trace_array *tr)
+{
+ int ret = 0;
+
+ arch_spin_lock(&tr->max_lock);
+
+ if (!tr->cond_snapshot)
+ ret = -EINVAL;
+ else {
+ kfree(tr->cond_snapshot);
+ tr->cond_snapshot = NULL;
+ }
+
+ arch_spin_unlock(&tr->max_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
#else
void tracing_snapshot(void)
{
WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
}
EXPORT_SYMBOL_GPL(tracing_snapshot);
+void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
+{
+ WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
int tracing_alloc_snapshot(void)
{
WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
@@ -1043,6 +1217,21 @@
tracing_snapshot();
}
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+void *tracing_cond_snapshot_data(struct trace_array *tr)
+{
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
+int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
+{
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
+int tracing_snapshot_cond_disable(struct trace_array *tr)
+{
+ return false;
+}
+EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
#endif /* CONFIG_TRACER_SNAPSHOT */
void tracer_tracing_off(struct trace_array *tr)
@@ -1330,7 +1519,7 @@
max_data->critical_start = data->critical_start;
max_data->critical_end = data->critical_end;
- memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
+ strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
max_data->pid = tsk->pid;
/*
* If tsk == current, then use current_uid(), as that does not use
@@ -1354,12 +1543,14 @@
* @tr: tracer
* @tsk: the task with the latency
* @cpu: The cpu that initiated the trace.
+ * @cond_data: User data associated with a conditional snapshot
*
* Flip the buffers between the @tr and the max_tr and record information
* about which task was the cause of this latency.
*/
void
-update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
+update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
+ void *cond_data)
{
if (tr->stop_count)
return;
@@ -1380,17 +1571,23 @@
else
ring_buffer_record_off(tr->max_buffer.buffer);
+#ifdef CONFIG_TRACER_SNAPSHOT
+ if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
+ goto out_unlock;
+#endif
swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
__update_max_tr(tr, tsk, cpu);
+
+ out_unlock:
arch_spin_unlock(&tr->max_lock);
}
/**
* update_max_tr_single - only copy one trace over, and reset the rest
- * @tr - tracer
- * @tsk - task with the latency
- * @cpu - the cpu of the buffer to copy.
+ * @tr: tracer
+ * @tsk: task with the latency
+ * @cpu: the cpu of the buffer to copy.
*
* Flip the trace of a single CPU buffer between the @tr and the max_tr.
*/
@@ -1431,7 +1628,7 @@
}
#endif /* CONFIG_TRACER_MAX_TRACE */
-static int wait_on_pipe(struct trace_iterator *iter, bool full)
+static int wait_on_pipe(struct trace_iterator *iter, int full)
{
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
@@ -1547,6 +1744,10 @@
pr_info("Running postponed tracer tests:\n");
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
+ /* This loop can take minutes when sanitizers are enabled, so
+ * lets make sure we allow RCU processing.
+ */
+ cond_resched();
ret = run_tracer_selftest(p->type);
/* If the test fails, then warn and remove from available_tracers */
if (ret < 0) {
@@ -1584,7 +1785,7 @@
/**
* register_tracer - register a tracer with the ftrace system.
- * @type - the plugin for the tracer
+ * @type: the plugin for the tracer
*
* Register a new plugin tracer.
*/
@@ -1671,7 +1872,7 @@
return ret;
}
-void tracing_reset(struct trace_buffer *buf, int cpu)
+static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
{
struct ring_buffer *buffer = buf->buffer;
@@ -1681,7 +1882,7 @@
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
ring_buffer_reset_cpu(buffer, cpu);
ring_buffer_record_enable(buffer);
@@ -1698,7 +1899,7 @@
ring_buffer_record_disable(buffer);
/* Make sure all commits have finished */
- synchronize_sched();
+ synchronize_rcu();
buf->time_start = buffer_ftrace_now(buf, buf->cpu);
@@ -1748,7 +1949,7 @@
static inline void set_cmdline(int idx, const char *cmdline)
{
- memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
+ strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
static int allocate_cmdlines_buffer(unsigned int val,
@@ -2047,9 +2248,9 @@
/**
* tracing_record_taskinfo - record the task info of a task
*
- * @task - task to record
- * @flags - TRACE_RECORD_CMDLINE for recording comm
- * - TRACE_RECORD_TGID for recording tgid
+ * @task: task to record
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
*/
void tracing_record_taskinfo(struct task_struct *task, int flags)
{
@@ -2075,10 +2276,10 @@
/**
* tracing_record_taskinfo_sched_switch - record task info for sched_switch
*
- * @prev - previous task during sched_switch
- * @next - next task during sched_switch
- * @flags - TRACE_RECORD_CMDLINE for recording comm
- * TRACE_RECORD_TGID for recording tgid
+ * @prev: previous task during sched_switch
+ * @next: next task during sched_switch
+ * @flags: TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
*/
void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
struct task_struct *next, int flags)
@@ -2128,13 +2329,14 @@
EXPORT_SYMBOL_GPL(trace_handle_return);
void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
- int pc)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
+ unsigned long flags, int pc)
{
struct task_struct *tsk = current;
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
+ entry->type = type;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -2250,7 +2452,7 @@
preempt_enable();
/* Wait for all current users to finish */
- synchronize_sched();
+ synchronize_rcu();
for_each_tracing_cpu(cpu) {
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
@@ -2452,16 +2654,16 @@
static_branch_disable(&ftrace_exports_enabled);
}
-void ftrace_exports(struct ring_buffer_event *event)
+static void ftrace_exports(struct ring_buffer_event *event)
{
struct trace_export *export;
preempt_disable_notrace();
- export = rcu_dereference_raw_notrace(ftrace_exports_list);
+ export = rcu_dereference_raw_check(ftrace_exports_list);
while (export) {
trace_process_export(export, event);
- export = rcu_dereference_raw_notrace(export->next);
+ export = rcu_dereference_raw_check(export->next);
}
preempt_enable_notrace();
@@ -2574,12 +2776,21 @@
#ifdef CONFIG_STACKTRACE
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
+#define FTRACE_KSTACK_NESTING 4
+
+#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
+
struct ftrace_stack {
- unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
+ unsigned long calls[FTRACE_KSTACK_ENTRIES];
};
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+
+struct ftrace_stacks {
+ struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);
static void __ftrace_trace_stack(struct ring_buffer *buffer,
@@ -2588,13 +2799,10 @@
{
struct trace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
+ unsigned int size, nr_entries;
+ struct ftrace_stack *fstack;
struct stack_entry *entry;
- struct stack_trace trace;
- int use_stack;
- int size = FTRACE_STACK_ENTRIES;
-
- trace.nr_entries = 0;
- trace.skip = skip;
+ int stackidx;
/*
* Add one, for this function and the call to save_stack_trace()
@@ -2602,7 +2810,7 @@
*/
#ifndef CONFIG_UNWINDER_ORC
if (!regs)
- trace.skip++;
+ skip++;
#endif
/*
@@ -2613,53 +2821,40 @@
*/
preempt_disable_notrace();
- use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+ stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
+
+ /* This should never happen. If it does, yell once and skip */
+ if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ goto out;
+
/*
- * We don't need any atomic variables, just a barrier.
- * If an interrupt comes in, we don't care, because it would
- * have exited and put the counter back to what we want.
- * We just need a barrier to keep gcc from moving things
- * around.
+ * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+ * interrupt will either see the value pre increment or post
+ * increment. If the interrupt happens pre increment it will have
+ * restored the counter when it returns. We just need a barrier to
+ * keep gcc from moving things around.
*/
barrier();
- if (use_stack == 1) {
- trace.entries = this_cpu_ptr(ftrace_stack.calls);
- trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
- if (regs)
- save_stack_trace_regs(regs, &trace);
- else
- save_stack_trace(&trace);
+ fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
+ size = ARRAY_SIZE(fstack->calls);
- if (trace.nr_entries > size)
- size = trace.nr_entries;
- } else
- /* From now on, use_stack is a boolean */
- use_stack = 0;
+ if (regs) {
+ nr_entries = stack_trace_save_regs(regs, fstack->calls,
+ size, skip);
+ } else {
+ nr_entries = stack_trace_save(fstack->calls, size, skip);
+ }
- size *= sizeof(unsigned long);
-
+ size = nr_entries * sizeof(unsigned long);
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
sizeof(*entry) + size, flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
- memset(&entry->caller, 0, size);
-
- if (use_stack)
- memcpy(&entry->caller, trace.entries,
- trace.nr_entries * sizeof(unsigned long));
- else {
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.entries = entry->caller;
- if (regs)
- save_stack_trace_regs(regs, &trace);
- else
- save_stack_trace(&trace);
- }
-
- entry->size = trace.nr_entries;
+ memcpy(&entry->caller, fstack->calls, size);
+ entry->size = nr_entries;
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -2727,16 +2922,17 @@
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
+EXPORT_SYMBOL_GPL(trace_dump_stack);
+#ifdef CONFIG_USER_STACKTRACE_SUPPORT
static DEFINE_PER_CPU(int, user_stack_count);
-void
+static void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
- struct stack_trace trace;
if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
@@ -2767,12 +2963,7 @@
entry->tgid = current->tgid;
memset(&entry->caller, 0, sizeof(entry->caller));
- trace.nr_entries = 0;
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.skip = 0;
- trace.entries = entry->caller;
-
- save_stack_trace_user(&trace);
+ stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
if (!call_filter_check_discard(call, entry, buffer, event))
__buffer_unlock_commit(buffer, event);
@@ -2781,13 +2972,12 @@
out:
preempt_enable();
}
-
-#ifdef UNUSED
-static void __trace_userstack(struct trace_array *tr, unsigned long flags)
+#else /* CONFIG_USER_STACKTRACE_SUPPORT */
+static void ftrace_trace_userstack(struct ring_buffer *buffer,
+ unsigned long flags, int pc)
{
- ftrace_trace_userstack(tr, flags, preempt_count());
}
-#endif /* UNUSED */
+#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
#endif /* CONFIG_STACKTRACE */
@@ -2877,6 +3067,7 @@
if (global_trace.trace_buffer.buffer)
tracing_start_cmdline_record();
}
+EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
void trace_printk_start_comm(void)
{
@@ -2899,7 +3090,9 @@
/**
* trace_vbprintk - write binary msg to tracing buffer
- *
+ * @ip: The address of the caller
+ * @fmt: The string format to write to the buffer
+ * @args: Arguments for @fmt
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
@@ -3037,6 +3230,7 @@
va_end(ap);
return ret;
}
+EXPORT_SYMBOL_GPL(trace_array_printk);
__printf(3, 4)
int trace_array_printk_buf(struct ring_buffer *buffer,
@@ -3315,33 +3509,68 @@
}
static void
+get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
+ unsigned long *entries, int cpu)
+{
+ unsigned long count;
+
+ count = ring_buffer_entries_cpu(buf->buffer, cpu);
+ /*
+ * If this buffer has skipped entries, then we hold all
+ * entries for the trace and we need to ignore the
+ * ones before the time stamp.
+ */
+ if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
+ count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
+ /* total is the same as the entries */
+ *total = count;
+ } else
+ *total = count +
+ ring_buffer_overrun_cpu(buf->buffer, cpu);
+ *entries = count;
+}
+
+static void
get_total_entries(struct trace_buffer *buf,
unsigned long *total, unsigned long *entries)
{
- unsigned long count;
+ unsigned long t, e;
int cpu;
*total = 0;
*entries = 0;
for_each_tracing_cpu(cpu) {
- count = ring_buffer_entries_cpu(buf->buffer, cpu);
- /*
- * If this buffer has skipped entries, then we hold all
- * entries for the trace and we need to ignore the
- * ones before the time stamp.
- */
- if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
- count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
- /* total is the same as the entries */
- *total += count;
- } else
- *total += count +
- ring_buffer_overrun_cpu(buf->buffer, cpu);
- *entries += count;
+ get_total_entries_cpu(buf, &t, &e, cpu);
+ *total += t;
+ *entries += e;
}
}
+unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
+{
+ unsigned long total, entries;
+
+ if (!tr)
+ tr = &global_trace;
+
+ get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
+
+ return entries;
+}
+
+unsigned long trace_total_entries(struct trace_array *tr)
+{
+ unsigned long total, entries;
+
+ if (!tr)
+ tr = &global_trace;
+
+ get_total_entries(&tr->trace_buffer, &total, &entries);
+
+ return entries;
+}
+
static void print_lat_help_header(struct seq_file *m)
{
seq_puts(m, "# _------=> CPU# \n"
@@ -3380,23 +3609,18 @@
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
- const char tgid_space[] = " ";
- const char space[] = " ";
+ const char *space = " ";
+ int prec = tgid ? 10 : 2;
- seq_printf(m, "# %s _-----=> irqs-off\n",
- tgid ? tgid_space : space);
- seq_printf(m, "# %s / _----=> need-resched\n",
- tgid ? tgid_space : space);
- seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
- tgid ? tgid_space : space);
- seq_printf(m, "# %s|| / _--=> preempt-depth\n",
- tgid ? tgid_space : space);
- seq_printf(m, "# %s||| / delay\n",
- tgid ? tgid_space : space);
- seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
- tgid ? " TGID " : space);
- seq_printf(m, "# | | %s | |||| | |\n",
- tgid ? " | " : space);
+ print_event_info(buf, m);
+
+ seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
}
void
@@ -3901,7 +4125,8 @@
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
- ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+ ring_buffer_read_prepare(iter->trace_buffer->buffer,
+ cpu, GFP_KERNEL);
}
ring_buffer_read_prepare_sync();
for_each_tracing_cpu(cpu) {
@@ -3911,7 +4136,8 @@
} else {
cpu = iter->cpu_file;
iter->buffer_iter[cpu] =
- ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+ ring_buffer_read_prepare(iter->trace_buffer->buffer,
+ cpu, GFP_KERNEL);
ring_buffer_read_prepare_sync();
ring_buffer_read_start(iter->buffer_iter[cpu]);
tracing_iter_reset(iter, cpu);
@@ -3932,8 +4158,11 @@
int tracing_open_generic(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
filp->private_data = inode->i_private;
return 0;
@@ -3948,15 +4177,14 @@
* Open and update trace_array ref count.
* Must have the current trace_array passed to it.
*/
-static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+int tracing_open_generic_tr(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
+ int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
filp->private_data = inode->i_private;
@@ -4025,10 +4253,11 @@
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
- int ret = 0;
+ int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
@@ -4043,7 +4272,7 @@
if (cpu == RING_BUFFER_ALL_CPUS)
tracing_reset_online_cpus(trace_buf);
else
- tracing_reset(trace_buf, cpu);
+ tracing_reset_cpu(trace_buf, cpu);
}
if (file->f_mode & FMODE_READ) {
@@ -4144,19 +4373,30 @@
struct seq_file *m;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- ret = seq_open(file, &show_traces_seq_ops);
+ ret = tracing_check_open_get_tr(tr);
if (ret)
return ret;
+ ret = seq_open(file, &show_traces_seq_ops);
+ if (ret) {
+ trace_array_put(tr);
+ return ret;
+ }
+
m = file->private_data;
m->private = tr;
return 0;
}
+static int show_traces_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+ return seq_release(inode, file);
+}
+
static ssize_t
tracing_write_stub(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
@@ -4187,8 +4427,8 @@
static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
- .release = seq_release,
.llseek = seq_lseek,
+ .release = show_traces_release,
};
static ssize_t
@@ -4407,13 +4647,15 @@
int neg = 0;
int ret;
size_t orig_len = strlen(option);
+ int len;
cmp = strstrip(option);
- if (strncmp(cmp, "no", 2) == 0) {
+ len = str_has_prefix(cmp, "no");
+ if (len)
neg = 1;
- cmp += 2;
- }
+
+ cmp += len;
mutex_lock(&trace_types_lock);
@@ -4487,11 +4729,9 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_trace_options_show, inode->i_private);
if (ret < 0)
@@ -4518,6 +4758,7 @@
" trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
" current_tracer\t- function and latency tracers\n"
" available_tracers\t- list of configured tracers for current_tracer\n"
+ " error_log\t- error log for failed commands (that support it)\n"
" buffer_size_kb\t- view and modify size of per cpu buffer\n"
" buffer_total_size_kb - view total size of all cpu buffers\n\n"
" trace_clock\t\t-change the clock used to order events\n"
@@ -4538,7 +4779,7 @@
" instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
"\t\t\t Remove sub-buffer with rmdir\n"
" trace_options\t\t- Set format or modify how tracing happens\n"
- "\t\t\t Disable an option by adding a suffix 'no' to the\n"
+ "\t\t\t Disable an option by prefixing 'no' to the\n"
"\t\t\t option name\n"
" saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -4603,31 +4844,49 @@
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_DYNAMIC_EVENTS
+ " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
+ "\t\t\t Write into this file to define/undefine new trace events.\n"
+#endif
#ifdef CONFIG_KPROBE_EVENTS
- " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
+ " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
- " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
+ " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
"\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
"\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t s:[synthetic/]<event> <field> [<field>]\n"
+#endif
"\t -:[<group>/]<event>\n"
#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
"place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
#ifdef CONFIG_UPROBE_EVENTS
- "\t place: <path>:<offset>\n"
+ " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
- "\t $stack<index>, $stack, $retval, $comm\n"
- "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
- "\t b<bit-width>@<bit-offset>/<container-size>\n"
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
+#else
+ "\t $stack<index>, $stack, $retval, $comm,\n"
+#endif
+ "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
+ "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
+ "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
+ "\t <type>\\[<array-size>\\]\n"
+#ifdef CONFIG_HIST_TRIGGERS
+ "\t field: <stype> <name>;\n"
+ "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
+ "\t [unsigned] char/int/long\n"
+#endif
#endif
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
@@ -4680,6 +4939,7 @@
"\t [:size=#entries]\n"
"\t [:pause][:continue][:clear]\n"
"\t [:name=histname1]\n"
+ "\t [:<handler>.<action>]\n"
"\t [if <filter>]\n\n"
"\t When a matching event is hit, an entry is added to a hash\n"
"\t table using the key(s) and value(s) named, and the value of a\n"
@@ -4720,8 +4980,21 @@
"\t unchanged.\n\n"
"\t The enable_hist and disable_hist triggers can be used to\n"
"\t have one event conditionally start and stop another event's\n"
- "\t already-attached hist trigger. The syntax is analagous to\n"
- "\t the enable_event and disable_event triggers.\n"
+ "\t already-attached hist trigger. The syntax is analogous to\n"
+ "\t the enable_event and disable_event triggers.\n\n"
+ "\t Hist trigger handlers and actions are executed whenever a\n"
+ "\t a histogram entry is added or updated. They take the form:\n\n"
+ "\t <handler>.<action>\n\n"
+ "\t The available handlers are:\n\n"
+ "\t onmatch(matching.event) - invoke on addition or update\n"
+ "\t onmax(var) - invoke if var exceeds current max\n"
+ "\t onchange(var) - invoke action if var changes\n\n"
+ "\t The available actions are:\n\n"
+ "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
+ "\t save(field,...) - save current event fields\n"
+#ifdef CONFIG_TRACER_SNAPSHOT
+ "\t snapshot() - snapshot the trace buffer\n"
+#endif
#endif
;
@@ -4795,8 +5068,11 @@
static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_saved_tgids_seq_ops);
}
@@ -4872,8 +5148,11 @@
static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
}
@@ -5037,8 +5316,11 @@
static int tracing_eval_map_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_eval_map_seq_ops);
}
@@ -5366,6 +5648,16 @@
if (t == tr->current_trace)
goto out;
+#ifdef CONFIG_TRACER_SNAPSHOT
+ if (t->use_max_tr) {
+ arch_spin_lock(&tr->max_lock);
+ if (tr->cond_snapshot)
+ ret = -EBUSY;
+ arch_spin_unlock(&tr->max_lock);
+ if (ret)
+ goto out;
+ }
+#endif
/* Some tracers won't work on kernel command line */
if (system_state < SYSTEM_RUNNING && t->noboot) {
pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
@@ -5392,7 +5684,7 @@
if (tr->current_trace->reset)
tr->current_trace->reset(tr);
- /* Current trace needs to be nop_trace before synchronize_sched */
+ /* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace;
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5406,7 +5698,7 @@
* The update_max_tr is called from interrupts disabled
* so a synchronized_sched() is sufficient.
*/
- synchronize_sched();
+ synchronize_rcu();
free_snapshot(tr);
}
#endif
@@ -5551,13 +5843,11 @@
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
- int ret = 0;
+ int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
mutex_lock(&trace_types_lock);
@@ -5604,7 +5894,6 @@
return ret;
fail:
- kfree(iter->trace);
kfree(iter);
__trace_array_put(tr);
mutex_unlock(&trace_types_lock);
@@ -5687,7 +5976,7 @@
mutex_unlock(&iter->mutex);
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
mutex_lock(&iter->mutex);
@@ -5747,6 +6036,7 @@
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
cpumask_clear(iter->started);
+ trace_seq_init(&iter->seq);
iter->pos = -1;
trace_event_read_lock();
@@ -5803,7 +6093,6 @@
}
static const struct pipe_buf_operations tracing_pipe_buf_ops = {
- .can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release,
.steal = generic_pipe_buf_steal,
@@ -6083,13 +6372,13 @@
struct ring_buffer *buffer;
struct print_entry *entry;
unsigned long irq_flags;
- const char faulted[] = "<faulted>";
ssize_t written;
int size;
int len;
/* Used in tracing_mark_raw_write() as well */
-#define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
+#define FAULTED_STR "<faulted>"
+#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
if (tracing_disabled)
return -EINVAL;
@@ -6121,7 +6410,7 @@
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
if (len) {
- memcpy(&entry->buf, faulted, FAULTED_SIZE);
+ memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
cnt = FAULTED_SIZE;
written = -EFAULT;
} else
@@ -6162,7 +6451,6 @@
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct raw_data_entry *entry;
- const char faulted[] = "<faulted>";
unsigned long irq_flags;
ssize_t written;
int size;
@@ -6202,7 +6490,7 @@
len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
if (len) {
entry->id = -1;
- memcpy(&entry->buf, faulted, FAULTED_SIZE);
+ memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
written = -EFAULT;
} else
written = cnt;
@@ -6297,11 +6585,9 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr))
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_clock_show, inode->i_private);
if (ret < 0)
@@ -6331,11 +6617,9 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr))
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
if (ret < 0)
@@ -6388,10 +6672,11 @@
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
struct seq_file *m;
- int ret = 0;
+ int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if (file->f_mode & FMODE_READ) {
iter = __tracing_open(inode, file, true);
@@ -6448,6 +6733,13 @@
goto out;
}
+ arch_spin_lock(&tr->max_lock);
+ if (tr->cond_snapshot)
+ ret = -EBUSY;
+ arch_spin_unlock(&tr->max_lock);
+ if (ret)
+ goto out;
+
switch (val) {
case 0:
if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
@@ -6465,15 +6757,17 @@
break;
}
#endif
- if (!tr->allocated_snapshot) {
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+ &tr->trace_buffer, iter->cpu_file);
+ else
ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
local_irq_disable();
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
- update_max_tr(tr, current, smp_processor_id());
+ update_max_tr(tr, current, smp_processor_id(), NULL);
else
update_max_tr_single(tr, current, iter->cpu_file);
local_irq_enable();
@@ -6483,7 +6777,7 @@
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
tracing_reset_online_cpus(&tr->max_buffer);
else
- tracing_reset(&tr->max_buffer, iter->cpu_file);
+ tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
}
break;
}
@@ -6527,6 +6821,7 @@
struct ftrace_buffer_info *info;
int ret;
+ /* The following checks for tracefs lockdown */
ret = tracing_buffers_open(inode, filp);
if (ret < 0)
return ret;
@@ -6648,17 +6943,260 @@
#endif /* CONFIG_TRACER_SNAPSHOT */
+#define TRACING_LOG_ERRS_MAX 8
+#define TRACING_LOG_LOC_MAX 128
+
+#define CMD_PREFIX " Command: "
+
+struct err_info {
+ const char **errs; /* ptr to loc-specific array of err strings */
+ u8 type; /* index into errs -> specific err string */
+ u8 pos; /* MAX_FILTER_STR_VAL = 256 */
+ u64 ts;
+};
+
+struct tracing_log_err {
+ struct list_head list;
+ struct err_info info;
+ char loc[TRACING_LOG_LOC_MAX]; /* err location */
+ char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
+};
+
+static DEFINE_MUTEX(tracing_err_log_lock);
+
+static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
+{
+ struct tracing_log_err *err;
+
+ if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
+ err = kzalloc(sizeof(*err), GFP_KERNEL);
+ if (!err)
+ err = ERR_PTR(-ENOMEM);
+ tr->n_err_log_entries++;
+
+ return err;
+ }
+
+ err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
+ list_del(&err->list);
+
+ return err;
+}
+
+/**
+ * err_pos - find the position of a string within a command for error careting
+ * @cmd: The tracing command that caused the error
+ * @str: The string to position the caret at within @cmd
+ *
+ * Finds the position of the first occurence of @str within @cmd. The
+ * return value can be passed to tracing_log_err() for caret placement
+ * within @cmd.
+ *
+ * Returns the index within @cmd of the first occurence of @str or 0
+ * if @str was not found.
+ */
+unsigned int err_pos(char *cmd, const char *str)
+{
+ char *found;
+
+ if (WARN_ON(!strlen(cmd)))
+ return 0;
+
+ found = strstr(cmd, str);
+ if (found)
+ return found - cmd;
+
+ return 0;
+}
+
+/**
+ * tracing_log_err - write an error to the tracing error log
+ * @tr: The associated trace array for the error (NULL for top level array)
+ * @loc: A string describing where the error occurred
+ * @cmd: The tracing command that caused the error
+ * @errs: The array of loc-specific static error strings
+ * @type: The index into errs[], which produces the specific static err string
+ * @pos: The position the caret should be placed in the cmd
+ *
+ * Writes an error into tracing/error_log of the form:
+ *
+ * <loc>: error: <text>
+ * Command: <cmd>
+ * ^
+ *
+ * tracing/error_log is a small log file containing the last
+ * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
+ * unless there has been a tracing error, and the error log can be
+ * cleared and have its memory freed by writing the empty string in
+ * truncation mode to it i.e. echo > tracing/error_log.
+ *
+ * NOTE: the @errs array along with the @type param are used to
+ * produce a static error string - this string is not copied and saved
+ * when the error is logged - only a pointer to it is saved. See
+ * existing callers for examples of how static strings are typically
+ * defined for use with tracing_log_err().
+ */
+void tracing_log_err(struct trace_array *tr,
+ const char *loc, const char *cmd,
+ const char **errs, u8 type, u8 pos)
+{
+ struct tracing_log_err *err;
+
+ if (!tr)
+ tr = &global_trace;
+
+ mutex_lock(&tracing_err_log_lock);
+ err = get_tracing_log_err(tr);
+ if (PTR_ERR(err) == -ENOMEM) {
+ mutex_unlock(&tracing_err_log_lock);
+ return;
+ }
+
+ snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
+ snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
+
+ err->info.errs = errs;
+ err->info.type = type;
+ err->info.pos = pos;
+ err->info.ts = local_clock();
+
+ list_add_tail(&err->list, &tr->err_log);
+ mutex_unlock(&tracing_err_log_lock);
+}
+
+static void clear_tracing_err_log(struct trace_array *tr)
+{
+ struct tracing_log_err *err, *next;
+
+ mutex_lock(&tracing_err_log_lock);
+ list_for_each_entry_safe(err, next, &tr->err_log, list) {
+ list_del(&err->list);
+ kfree(err);
+ }
+
+ tr->n_err_log_entries = 0;
+ mutex_unlock(&tracing_err_log_lock);
+}
+
+static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct trace_array *tr = m->private;
+
+ mutex_lock(&tracing_err_log_lock);
+
+ return seq_list_start(&tr->err_log, *pos);
+}
+
+static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct trace_array *tr = m->private;
+
+ return seq_list_next(v, &tr->err_log, pos);
+}
+
+static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&tracing_err_log_lock);
+}
+
+static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
+{
+ u8 i;
+
+ for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
+ seq_putc(m, ' ');
+ for (i = 0; i < pos; i++)
+ seq_putc(m, ' ');
+ seq_puts(m, "^\n");
+}
+
+static int tracing_err_log_seq_show(struct seq_file *m, void *v)
+{
+ struct tracing_log_err *err = v;
+
+ if (err) {
+ const char *err_text = err->info.errs[err->info.type];
+ u64 sec = err->info.ts;
+ u32 nsec;
+
+ nsec = do_div(sec, NSEC_PER_SEC);
+ seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
+ err->loc, err_text);
+ seq_printf(m, "%s", err->cmd);
+ tracing_err_log_show_pos(m, err->info.pos);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations tracing_err_log_seq_ops = {
+ .start = tracing_err_log_seq_start,
+ .next = tracing_err_log_seq_next,
+ .stop = tracing_err_log_seq_stop,
+ .show = tracing_err_log_seq_show
+};
+
+static int tracing_err_log_open(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+ int ret = 0;
+
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
+
+ /* If this file was opened for write, then erase contents */
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
+ clear_tracing_err_log(tr);
+
+ if (file->f_mode & FMODE_READ) {
+ ret = seq_open(file, &tracing_err_log_seq_ops);
+ if (!ret) {
+ struct seq_file *m = file->private_data;
+ m->private = tr;
+ } else {
+ trace_array_put(tr);
+ }
+ }
+ return ret;
+}
+
+static ssize_t tracing_err_log_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static int tracing_err_log_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ return 0;
+}
+
+static const struct file_operations tracing_err_log_fops = {
+ .open = tracing_err_log_open,
+ .write = tracing_err_log_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = tracing_err_log_release,
+};
+
static int tracing_buffers_open(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
struct ftrace_buffer_info *info;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
@@ -6745,7 +7283,7 @@
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
- ret = wait_on_pipe(iter, false);
+ ret = wait_on_pipe(iter, 0);
if (ret)
return ret;
@@ -6797,36 +7335,43 @@
struct ring_buffer *buffer;
void *page;
int cpu;
- int ref;
+ refcount_t refcount;
};
+static void buffer_ref_release(struct buffer_ref *ref)
+{
+ if (!refcount_dec_and_test(&ref->refcount))
+ return;
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
+ kfree(ref);
+}
+
static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
buf->private = 0;
}
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct buffer_ref *ref = (struct buffer_ref *)buf->private;
- ref->ref++;
+ if (refcount_read(&ref->refcount) > INT_MAX/2)
+ return false;
+
+ refcount_inc(&ref->refcount);
+ return true;
}
/* Pipe buffer operations for a buffer. */
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
- .can_merge = 0,
.confirm = generic_pipe_buf_confirm,
.release = buffer_pipe_buf_release,
- .steal = generic_pipe_buf_steal,
+ .steal = generic_pipe_buf_nosteal,
.get = buffer_pipe_buf_get,
};
@@ -6839,11 +7384,7 @@
struct buffer_ref *ref =
(struct buffer_ref *)spd->partial[i].private;
- if (--ref->ref)
- return;
-
- ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
- kfree(ref);
+ buffer_ref_release(ref);
spd->partial[i].private = 0;
}
@@ -6898,7 +7439,7 @@
break;
}
- ref->ref = 1;
+ refcount_set(&ref->refcount, 1);
ref->buffer = iter->trace_buffer->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
if (IS_ERR(ref->page)) {
@@ -6942,7 +7483,7 @@
if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
goto out;
- ret = wait_on_pipe(iter, true);
+ ret = wait_on_pipe(iter, iter->tr->buffer_percent);
if (ret)
goto out;
@@ -7656,7 +8197,54 @@
.llseek = default_llseek,
};
-struct dentry *trace_instance_dir;
+static ssize_t
+buffer_percent_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r;
+
+ r = tr->buffer_percent;
+ r = sprintf(buf, "%d\n", r);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+buffer_percent_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ if (val > 100)
+ return -EINVAL;
+
+ if (!val)
+ val = 1;
+
+ tr->buffer_percent = val;
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static const struct file_operations buffer_percent_fops = {
+ .open = tracing_open_generic_tr,
+ .read = buffer_percent_read,
+ .write = buffer_percent_write,
+ .release = tracing_release_generic_tr,
+ .llseek = default_llseek,
+};
+
+static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
@@ -7763,7 +8351,7 @@
mutex_unlock(&trace_types_lock);
}
-static int instance_mkdir(const char *name)
+struct trace_array *trace_array_create(const char *name)
{
struct trace_array *tr;
int ret;
@@ -7802,6 +8390,7 @@
INIT_LIST_HEAD(&tr->systems);
INIT_LIST_HEAD(&tr->events);
INIT_LIST_HEAD(&tr->hist_vars);
+ INIT_LIST_HEAD(&tr->err_log);
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;
@@ -7827,7 +8416,7 @@
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
- return 0;
+ return tr;
out_free_tr:
free_trace_buffers(tr);
@@ -7839,33 +8428,21 @@
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
- return ret;
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(trace_array_create);
+static int instance_mkdir(const char *name)
+{
+ return PTR_ERR_OR_ZERO(trace_array_create(name));
}
-static int instance_rmdir(const char *name)
+static int __remove_instance(struct trace_array *tr)
{
- struct trace_array *tr;
- int found = 0;
- int ret;
int i;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
-
- ret = -ENODEV;
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr->name && strcmp(tr->name, name) == 0) {
- found = 1;
- break;
- }
- }
- if (!found)
- goto out_unlock;
-
- ret = -EBUSY;
if (tr->ref || (tr->current_trace && tr->current_trace->ref))
- goto out_unlock;
+ return -EBUSY;
list_del(&tr->list);
@@ -7891,10 +8468,46 @@
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
+ tr = NULL;
- ret = 0;
+ return 0;
+}
- out_unlock:
+int trace_array_destroy(struct trace_array *tr)
+{
+ int ret;
+
+ if (!tr)
+ return -EINVAL;
+
+ mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
+
+ ret = __remove_instance(tr);
+
+ mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(trace_array_destroy);
+
+static int instance_rmdir(const char *name)
+{
+ struct trace_array *tr;
+ int ret;
+
+ mutex_lock(&event_mutex);
+ mutex_lock(&trace_types_lock);
+
+ ret = -ENODEV;
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr->name && strcmp(tr->name, name) == 0) {
+ ret = __remove_instance(tr);
+ break;
+ }
+ }
+
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -7964,6 +8577,11 @@
trace_create_file("timestamp_mode", 0444, d_tracer, tr,
&trace_time_stamp_mode_fops);
+ tr->buffer_percent = 50;
+
+ trace_create_file("buffer_percent", 0444, d_tracer,
+ tr, &buffer_percent_fops);
+
create_trace_options_dir(tr);
#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
@@ -7979,6 +8597,9 @@
tr, &snapshot_fops);
#endif
+ trace_create_file("error_log", 0644, d_tracer,
+ tr, &tracing_err_log_fops);
+
for_each_tracing_cpu(cpu)
tracing_init_tracefs_percpu(tr, cpu);
@@ -8035,10 +8656,6 @@
*/
tr->dir = debugfs_create_automount("tracing", NULL,
trace_automount, NULL);
- if (!tr->dir) {
- pr_warn_once("Could not create debugfs directory 'tracing'\n");
- return ERR_PTR(-ENOMEM);
- }
return NULL;
}
@@ -8341,12 +8958,8 @@
cnt++;
- /* reset all but tr, trace, and overruns */
- memset(&iter.seq, 0,
- sizeof(struct trace_iterator) -
- offsetof(struct trace_iterator, seq));
+ trace_iterator_reset(&iter);
iter.iter_flags |= TRACE_FILE_LAT_FMT;
- iter.pos = -1;
if (trace_find_next_entry_inc(&iter) != NULL) {
int ret;
@@ -8564,6 +9177,7 @@
INIT_LIST_HEAD(&global_trace.systems);
INIT_LIST_HEAD(&global_trace.events);
INIT_LIST_HEAD(&global_trace.hist_vars);
+ INIT_LIST_HEAD(&global_trace.err_log);
list_add(&global_trace.list, &ftrace_trace_arrays);
apply_trace_boot_options();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 447bd96..d685c61 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -15,7 +15,6 @@
#include <linux/trace_seq.h>
#include <linux/trace_events.h>
#include <linux/compiler.h>
-#include <linux/trace_seq.h>
#include <linux/glob.h>
#ifdef CONFIG_FTRACE_SYSCALLS
@@ -194,6 +193,51 @@
unsigned long *pids;
};
+typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
+
+/**
+ * struct cond_snapshot - conditional snapshot data and callback
+ *
+ * The cond_snapshot structure encapsulates a callback function and
+ * data associated with the snapshot for a given tracing instance.
+ *
+ * When a snapshot is taken conditionally, by invoking
+ * tracing_snapshot_cond(tr, cond_data), the cond_data passed in is
+ * passed in turn to the cond_snapshot.update() function. That data
+ * can be compared by the update() implementation with the cond_data
+ * contained wihin the struct cond_snapshot instance associated with
+ * the trace_array. Because the tr->max_lock is held throughout the
+ * update() call, the update() function can directly retrieve the
+ * cond_snapshot and cond_data associated with the per-instance
+ * snapshot associated with the trace_array.
+ *
+ * The cond_snapshot.update() implementation can save data to be
+ * associated with the snapshot if it decides to, and returns 'true'
+ * in that case, or it returns 'false' if the conditional snapshot
+ * shouldn't be taken.
+ *
+ * The cond_snapshot instance is created and associated with the
+ * user-defined cond_data by tracing_cond_snapshot_enable().
+ * Likewise, the cond_snapshot instance is destroyed and is no longer
+ * associated with the trace instance by
+ * tracing_cond_snapshot_disable().
+ *
+ * The method below is required.
+ *
+ * @update: When a conditional snapshot is invoked, the update()
+ * callback function is invoked with the tr->max_lock held. The
+ * update() implementation signals whether or not to actually
+ * take the snapshot, by returning 'true' if so, 'false' if no
+ * snapshot should be taken. Because the max_lock is held for
+ * the duration of update(), the implementation is safe to
+ * directly retrieven and save any implementation data it needs
+ * to in association with the snapshot.
+ */
+struct cond_snapshot {
+ void *cond_data;
+ cond_update_fn_t update;
+};
+
/*
* The trace array - an array of per-CPU trace arrays. This is the
* highest level data structure that individual tracers deal with.
@@ -247,11 +291,14 @@
int clock_id;
int nr_topts;
bool clear_trace;
+ int buffer_percent;
+ unsigned int n_err_log_entries;
struct tracer *current_trace;
unsigned int trace_flags;
unsigned char trace_flags_index[TRACE_FLAGS_MAX_SIZE];
unsigned int flags;
raw_spinlock_t start_lock;
+ struct list_head err_log;
struct dentry *dir;
struct dentry *options;
struct dentry *percpu_dir;
@@ -276,6 +323,9 @@
#endif
int time_stamp_abs_ref;
struct list_head hist_vars;
+#ifdef CONFIG_TRACER_SNAPSHOT
+ struct cond_snapshot *cond_snapshot;
+#endif
};
enum {
@@ -288,6 +338,7 @@
extern int trace_array_get(struct trace_array *tr);
extern void trace_array_put(struct trace_array *tr);
+extern int tracing_check_open_get_tr(struct trace_array *tr);
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -315,11 +366,11 @@
__builtin_types_compatible_p(typeof(var), type *)
#undef IF_ASSIGN
-#define IF_ASSIGN(var, entry, etype, id) \
- if (FTRACE_CMP_TYPE(var, etype)) { \
- var = (typeof(var))(entry); \
- WARN_ON(id && (entry)->type != id); \
- break; \
+#define IF_ASSIGN(var, entry, etype, id) \
+ if (FTRACE_CMP_TYPE(var, etype)) { \
+ var = (typeof(var))(entry); \
+ WARN_ON(id != 0 && (entry)->type != id); \
+ break; \
}
/* Will cause compile errors if type is not found. */
@@ -534,6 +585,13 @@
TRACE_GRAPH_DEPTH_START_BIT,
TRACE_GRAPH_DEPTH_END_BIT,
+
+ /*
+ * To implement set_graph_notrace, if this bit is set, we ignore
+ * function graph tracing of called functions, until the return
+ * function is called to clear it.
+ */
+ TRACE_GRAPH_NOTRACE_BIT,
};
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0)
@@ -620,11 +678,11 @@
int tracer_init(struct tracer *t, struct trace_array *tr);
int tracing_is_enabled(void);
-void tracing_reset(struct trace_buffer *buf, int cpu);
void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
+int tracing_open_generic_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
bool tracer_tracing_is_on(struct trace_array *tr);
void tracer_tracing_on(struct trace_array *tr);
@@ -663,6 +721,9 @@
void tracing_iter_reset(struct trace_iterator *iter, int cpu);
+unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu);
+unsigned long trace_total_entries(struct trace_array *tr);
+
void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
@@ -719,23 +780,16 @@
const char __user *ubuf, size_t cnt);
#ifdef CONFIG_TRACER_MAX_TRACE
-void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
+void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
+ void *cond_data);
void update_max_tr_single(struct trace_array *tr,
struct task_struct *tsk, int cpu);
#endif /* CONFIG_TRACER_MAX_TRACE */
#ifdef CONFIG_STACKTRACE
-void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
- int pc);
-
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc);
#else
-static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
- unsigned long flags, int pc)
-{
-}
-
static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
int skip, int pc)
{
@@ -847,15 +901,21 @@
#define TRACE_GRAPH_PRINT_PROC 0x8
#define TRACE_GRAPH_PRINT_DURATION 0x10
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-#define TRACE_GRAPH_PRINT_TAIL 0x80
-#define TRACE_GRAPH_SLEEP_TIME 0x100
-#define TRACE_GRAPH_GRAPH_TIME 0x200
+#define TRACE_GRAPH_PRINT_REL_TIME 0x40
+#define TRACE_GRAPH_PRINT_IRQS 0x80
+#define TRACE_GRAPH_PRINT_TAIL 0x100
+#define TRACE_GRAPH_SLEEP_TIME 0x200
+#define TRACE_GRAPH_GRAPH_TIME 0x400
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
extern void ftrace_graph_sleep_time_control(bool enable);
+
+#ifdef CONFIG_FUNCTION_PROFILER
extern void ftrace_graph_graph_time_control(bool enable);
+#else
+static inline void ftrace_graph_graph_time_control(bool enable) { }
+#endif
extern enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags);
@@ -1445,6 +1505,7 @@
MATCH_MIDDLE_ONLY,
MATCH_END_ONLY,
MATCH_GLOB,
+ MATCH_INDEX,
};
struct regex {
@@ -1489,7 +1550,8 @@
extern void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s);
extern int filter_assign_type(const char *type);
-extern int create_event_filter(struct trace_event_call *call,
+extern int create_event_filter(struct trace_array *tr,
+ struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp);
extern void free_event_filter(struct event_filter *filter);
@@ -1795,6 +1857,11 @@
extern int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable);
extern int tracing_alloc_snapshot(void);
+extern void tracing_snapshot_cond(struct trace_array *tr, void *cond_data);
+extern int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update);
+
+extern int tracing_snapshot_cond_disable(struct trace_array *tr);
+extern void *tracing_cond_snapshot_data(struct trace_array *tr);
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
@@ -1815,6 +1882,11 @@
const char __user *buffer, size_t count, loff_t *ppos,
int (*createfn)(int, char**));
+extern unsigned int err_pos(char *cmd, const char *str);
+extern void tracing_log_err(struct trace_array *tr,
+ const char *loc, const char *cmd,
+ const char **errs, u8 type, u8 pos);
+
/*
* Normal trace_printk() and friends allocates special buffers
* to do the manipulation, as well as saves the print formats
@@ -1895,4 +1967,22 @@
extern struct trace_iterator *tracepoint_print_iter;
+/*
+ * Reset the state of the trace_iterator so that it can read consumed data.
+ * Normally, the trace_iterator is used for reading the data when it is not
+ * consumed, and must retain state.
+ */
+static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
+{
+ const size_t offset = offsetof(struct trace_iterator, seq);
+
+ /*
+ * Keep gcc from complaining about overwriting more than just one
+ * member in the structure.
+ */
+ memset((char *)iter + offset, 0, sizeof(struct trace_iterator) - offset);
+
+ iter->pos = -1;
+}
+
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 4ad9674..3ea65cd 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -205,6 +205,8 @@
void ftrace_likely_update(struct ftrace_likely_data *f, int val,
int expect, int is_constant)
{
+ unsigned long flags = user_access_save();
+
/* A constant is always correct */
if (is_constant) {
f->constant++;
@@ -223,6 +225,8 @@
f->data.correct++;
else
f->data.incorrect++;
+
+ user_access_restore(flags);
}
EXPORT_SYMBOL(ftrace_likely_update);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
new file mode 100644
index 0000000..89779eb
--- /dev/null
+++ b/kernel/trace/trace_dynevent.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic dynamic event control interface
+ *
+ * Copyright (C) 2018 Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/tracefs.h>
+
+#include "trace.h"
+#include "trace_dynevent.h"
+
+static DEFINE_MUTEX(dyn_event_ops_mutex);
+static LIST_HEAD(dyn_event_ops_list);
+
+int dyn_event_register(struct dyn_event_operations *ops)
+{
+ if (!ops || !ops->create || !ops->show || !ops->is_busy ||
+ !ops->free || !ops->match)
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&ops->list);
+ mutex_lock(&dyn_event_ops_mutex);
+ list_add_tail(&ops->list, &dyn_event_ops_list);
+ mutex_unlock(&dyn_event_ops_mutex);
+ return 0;
+}
+
+int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type)
+{
+ struct dyn_event *pos, *n;
+ char *system = NULL, *event, *p;
+ int ret = -ENOENT;
+
+ if (argv[0][0] == '-') {
+ if (argv[0][1] != ':')
+ return -EINVAL;
+ event = &argv[0][2];
+ } else {
+ event = strchr(argv[0], ':');
+ if (!event)
+ return -EINVAL;
+ event++;
+ }
+ argc--; argv++;
+
+ p = strchr(event, '/');
+ if (p) {
+ system = event;
+ event = p + 1;
+ *p = '\0';
+ }
+ if (event[0] == '\0')
+ return -EINVAL;
+
+ mutex_lock(&event_mutex);
+ for_each_dyn_event_safe(pos, n) {
+ if (type && type != pos->ops)
+ continue;
+ if (!pos->ops->match(system, event,
+ argc, (const char **)argv, pos))
+ continue;
+
+ ret = pos->ops->free(pos);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static int create_dyn_event(int argc, char **argv)
+{
+ struct dyn_event_operations *ops;
+ int ret = -ENODEV;
+
+ if (argv[0][0] == '-' || argv[0][0] == '!')
+ return dyn_event_release(argc, argv, NULL);
+
+ mutex_lock(&dyn_event_ops_mutex);
+ list_for_each_entry(ops, &dyn_event_ops_list, list) {
+ ret = ops->create(argc, (const char **)argv);
+ if (!ret || ret != -ECANCELED)
+ break;
+ }
+ mutex_unlock(&dyn_event_ops_mutex);
+ if (ret == -ECANCELED)
+ ret = -EINVAL;
+
+ return ret;
+}
+
+/* Protected by event_mutex */
+LIST_HEAD(dyn_event_list);
+
+void *dyn_event_seq_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&event_mutex);
+ return seq_list_start(&dyn_event_list, *pos);
+}
+
+void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ return seq_list_next(v, &dyn_event_list, pos);
+}
+
+void dyn_event_seq_stop(struct seq_file *m, void *v)
+{
+ mutex_unlock(&event_mutex);
+}
+
+static int dyn_event_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (ev && ev->ops)
+ return ev->ops->show(m, ev);
+
+ return 0;
+}
+
+static const struct seq_operations dyn_event_seq_op = {
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = dyn_event_seq_show
+};
+
+/*
+ * dyn_events_release_all - Release all specific events
+ * @type: the dyn_event_operations * which filters releasing events
+ *
+ * This releases all events which ->ops matches @type. If @type is NULL,
+ * all events are released.
+ * Return -EBUSY if any of them are in use, and return other errors when
+ * it failed to free the given event. Except for -EBUSY, event releasing
+ * process will be aborted at that point and there may be some other
+ * releasable events on the list.
+ */
+int dyn_events_release_all(struct dyn_event_operations *type)
+{
+ struct dyn_event *ev, *tmp;
+ int ret = 0;
+
+ mutex_lock(&event_mutex);
+ for_each_dyn_event(ev) {
+ if (type && ev->ops != type)
+ continue;
+ if (ev->ops->is_busy(ev)) {
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+ for_each_dyn_event_safe(ev, tmp) {
+ if (type && ev->ops != type)
+ continue;
+ ret = ev->ops->free(ev);
+ if (ret)
+ break;
+ }
+out:
+ mutex_unlock(&event_mutex);
+
+ return ret;
+}
+
+static int dyn_event_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
+
+ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+ ret = dyn_events_release_all(NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return seq_open(file, &dyn_event_seq_op);
+}
+
+static ssize_t dyn_event_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ return trace_parse_run_command(file, buffer, count, ppos,
+ create_dyn_event);
+}
+
+static const struct file_operations dynamic_events_ops = {
+ .owner = THIS_MODULE,
+ .open = dyn_event_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ .write = dyn_event_write,
+};
+
+/* Make a tracefs interface for controlling dynamic events */
+static __init int init_dynamic_event(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ if (IS_ERR(d_tracer))
+ return 0;
+
+ entry = tracefs_create_file("dynamic_events", 0644, d_tracer,
+ NULL, &dynamic_events_ops);
+
+ /* Event list interface */
+ if (!entry)
+ pr_warn("Could not create tracefs 'dynamic_events' entry\n");
+
+ return 0;
+}
+fs_initcall(init_dynamic_event);
diff --git a/kernel/trace/trace_dynevent.h b/kernel/trace/trace_dynevent.h
new file mode 100644
index 0000000..4689813
--- /dev/null
+++ b/kernel/trace/trace_dynevent.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common header file for generic dynamic events.
+ */
+
+#ifndef _TRACE_DYNEVENT_H
+#define _TRACE_DYNEVENT_H
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+
+#include "trace.h"
+
+struct dyn_event;
+
+/**
+ * struct dyn_event_operations - Methods for each type of dynamic events
+ *
+ * These methods must be set for each type, since there is no default method.
+ * Before using this for dyn_event_init(), it must be registered by
+ * dyn_event_register().
+ *
+ * @create: Parse and create event method. This is invoked when user passes
+ * a event definition to dynamic_events interface. This must not destruct
+ * the arguments and return -ECANCELED if given arguments doesn't match its
+ * command prefix.
+ * @show: Showing method. This is invoked when user reads the event definitions
+ * via dynamic_events interface.
+ * @is_busy: Check whether given event is busy so that it can not be deleted.
+ * Return true if it is busy, otherwides false.
+ * @free: Delete the given event. Return 0 if success, otherwides error.
+ * @match: Check whether given event and system name match this event. The argc
+ * and argv is used for exact match. Return true if it matches, otherwides
+ * false.
+ *
+ * Except for @create, these methods are called under holding event_mutex.
+ */
+struct dyn_event_operations {
+ struct list_head list;
+ int (*create)(int argc, const char *argv[]);
+ int (*show)(struct seq_file *m, struct dyn_event *ev);
+ bool (*is_busy)(struct dyn_event *ev);
+ int (*free)(struct dyn_event *ev);
+ bool (*match)(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev);
+};
+
+/* Register new dyn_event type -- must be called at first */
+int dyn_event_register(struct dyn_event_operations *ops);
+
+/**
+ * struct dyn_event - Dynamic event list header
+ *
+ * The dyn_event structure encapsulates a list and a pointer to the operators
+ * for making a global list of dynamic events.
+ * User must includes this in each event structure, so that those events can
+ * be added/removed via dynamic_events interface.
+ */
+struct dyn_event {
+ struct list_head list;
+ struct dyn_event_operations *ops;
+};
+
+extern struct list_head dyn_event_list;
+
+static inline
+int dyn_event_init(struct dyn_event *ev, struct dyn_event_operations *ops)
+{
+ if (!ev || !ops)
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&ev->list);
+ ev->ops = ops;
+ return 0;
+}
+
+static inline int dyn_event_add(struct dyn_event *ev)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (!ev || !ev->ops)
+ return -EINVAL;
+
+ list_add_tail(&ev->list, &dyn_event_list);
+ return 0;
+}
+
+static inline void dyn_event_remove(struct dyn_event *ev)
+{
+ lockdep_assert_held(&event_mutex);
+ list_del_init(&ev->list);
+}
+
+void *dyn_event_seq_start(struct seq_file *m, loff_t *pos);
+void *dyn_event_seq_next(struct seq_file *m, void *v, loff_t *pos);
+void dyn_event_seq_stop(struct seq_file *m, void *v);
+int dyn_events_release_all(struct dyn_event_operations *type);
+int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type);
+
+/*
+ * for_each_dyn_event - iterate over the dyn_event list
+ * @pos: the struct dyn_event * to use as a loop cursor
+ *
+ * This is just a basement of for_each macro. Wrap this for
+ * each actual event structure with ops filtering.
+ */
+#define for_each_dyn_event(pos) \
+ list_for_each_entry(pos, &dyn_event_list, list)
+
+/*
+ * for_each_dyn_event - iterate over the dyn_event list safely
+ * @pos: the struct dyn_event * to use as a loop cursor
+ * @n: the struct dyn_event * to use as temporary storage
+ */
+#define for_each_dyn_event_safe(pos, n) \
+ list_for_each_entry_safe(pos, n, &dyn_event_list, list)
+
+#endif
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 06bb2fd..fc8e973 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -65,7 +65,8 @@
__field( unsigned long, parent_ip )
),
- F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
+ F_printk(" %ps <-- %ps",
+ (void *)__entry->ip, (void *)__entry->parent_ip),
FILTER_TRACE_FN,
@@ -83,7 +84,7 @@
__field_desc( int, graph_ent, depth )
),
- F_printk("--> %lx (%d)", __entry->func, __entry->depth),
+ F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth),
FILTER_OTHER
);
@@ -102,8 +103,8 @@
__field_desc( int, ret, depth )
),
- F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
- __entry->func, __entry->depth,
+ F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
+ (void *)__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth),
@@ -167,12 +168,6 @@
#define FTRACE_STACK_ENTRIES 8
-#ifndef CONFIG_64BIT
-# define IP_FMT "%08lx"
-#else
-# define IP_FMT "%016lx"
-#endif
-
FTRACE_ENTRY(kernel_stack, stack_entry,
TRACE_STACK,
@@ -182,12 +177,13 @@
__dynamic_array(unsigned long, caller )
),
- F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
- "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
- "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
- __entry->caller[0], __entry->caller[1], __entry->caller[2],
- __entry->caller[3], __entry->caller[4], __entry->caller[5],
- __entry->caller[6], __entry->caller[7]),
+ F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+ "\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+ "\t=> %ps\n\t=> %ps\n",
+ (void *)__entry->caller[0], (void *)__entry->caller[1],
+ (void *)__entry->caller[2], (void *)__entry->caller[3],
+ (void *)__entry->caller[4], (void *)__entry->caller[5],
+ (void *)__entry->caller[6], (void *)__entry->caller[7]),
FILTER_OTHER
);
@@ -201,12 +197,13 @@
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
),
- F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
- "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
- "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
- __entry->caller[0], __entry->caller[1], __entry->caller[2],
- __entry->caller[3], __entry->caller[4], __entry->caller[5],
- __entry->caller[6], __entry->caller[7]),
+ F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+ "\t=> %ps\n\t=> %ps\n\t=> %ps\n"
+ "\t=> %ps\n\t=> %ps\n",
+ (void *)__entry->caller[0], (void *)__entry->caller[1],
+ (void *)__entry->caller[2], (void *)__entry->caller[3],
+ (void *)__entry->caller[4], (void *)__entry->caller[5],
+ (void *)__entry->caller[6], (void *)__entry->caller[7]),
FILTER_OTHER
);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 69a3fe9..a9dfa04 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -272,9 +272,11 @@
goto out;
}
+ mutex_lock(&event_mutex);
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
destroy_local_trace_kprobe(tp_event);
+ mutex_unlock(&event_mutex);
out:
kfree(func);
return ret;
@@ -282,15 +284,18 @@
void perf_kprobe_destroy(struct perf_event *p_event)
{
+ mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
+ mutex_unlock(&event_mutex);
destroy_local_trace_kprobe(p_event->tp_event);
}
#endif /* CONFIG_KPROBE_EVENTS */
#ifdef CONFIG_UPROBE_EVENTS
-int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+int perf_uprobe_init(struct perf_event *p_event,
+ unsigned long ref_ctr_offset, bool is_retprobe)
{
int ret;
char *path = NULL;
@@ -298,22 +303,20 @@
if (!p_event->attr.uprobe_path)
return -EINVAL;
- path = kzalloc(PATH_MAX, GFP_KERNEL);
- if (!path)
- return -ENOMEM;
- ret = strncpy_from_user(
- path, u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX);
- if (ret == PATH_MAX)
- return -E2BIG;
- if (ret < 0)
- goto out;
+
+ path = strndup_user(u64_to_user_ptr(p_event->attr.uprobe_path),
+ PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ return (ret == -EINVAL) ? -E2BIG : ret;
+ }
if (path[0] == '\0') {
ret = -EINVAL;
goto out;
}
- tp_event = create_local_trace_uprobe(
- path, p_event->attr.probe_offset, is_retprobe);
+ tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
+ ref_ctr_offset, is_retprobe);
if (IS_ERR(tp_event)) {
ret = PTR_ERR(tp_event);
goto out;
@@ -417,8 +420,7 @@
unsigned long flags;
local_save_flags(flags);
- tracing_generic_entry_update(entry, flags, pc);
- entry->type = type;
+ tracing_generic_entry_update(entry, type, flags, pc);
}
NOKPROBE_SYMBOL(perf_trace_buf_update);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f94be0c..fba87d1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) fmt
#include <linux/workqueue.h>
+#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/tracefs.h>
@@ -70,14 +71,6 @@
#define while_for_each_event_file() \
}
-static struct list_head *
-trace_get_fields(struct trace_event_call *event_call)
-{
- if (!event_call->class->get_fields)
- return &event_call->class->fields;
- return event_call->class->get_fields(event_call);
-}
-
static struct ftrace_event_field *
__find_event_field(struct list_head *head, char *name)
{
@@ -263,12 +256,12 @@
local_save_flags(fbuffer->flags);
fbuffer->pc = preempt_count();
/*
- * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
+ * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
* preemption (adding one to the preempt_count). Since we are
* interested in the preempt_count at the time the tracepoint was
* hit, we need to subtract one to offset the increment.
*/
- if (IS_ENABLED(CONFIG_PREEMPT))
+ if (IS_ENABLED(CONFIG_PREEMPTION))
fbuffer->pc--;
fbuffer->trace_file = trace_file;
@@ -795,7 +788,7 @@
return ret;
}
-static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
+int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
{
char *event = NULL, *sub = NULL, *match;
int ret;
@@ -832,6 +825,7 @@
return ret;
}
+EXPORT_SYMBOL_GPL(ftrace_set_clr_event);
/**
* trace_set_clr_event - enable or disable an event
@@ -1251,7 +1245,7 @@
*/
array_descriptor = strchr(field->type, '[');
- if (!strncmp(field->type, "__data_loc", 10))
+ if (str_has_prefix(field->type, "__data_loc"))
array_descriptor = NULL;
if (!array_descriptor)
@@ -1301,6 +1295,8 @@
struct seq_file *m;
int ret;
+ /* Do we want to hide event format files on tracefs lockdown? */
+
ret = seq_open(file, &trace_format_seq_ops);
if (ret < 0)
return ret;
@@ -1318,9 +1314,6 @@
char buf[32];
int len;
- if (*ppos)
- return 0;
-
if (unlikely(!id))
return -ENODEV;
@@ -1450,28 +1443,17 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_is_disabled())
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
-
/* Make a temporary dir that has no system but points to tr */
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
- if (!dir) {
- trace_array_put(tr);
+ if (!dir)
return -ENOMEM;
- }
- dir->tr = tr;
-
- ret = tracing_open_generic(inode, filp);
+ ret = tracing_open_generic_tr(inode, filp);
if (ret < 0) {
- trace_array_put(tr);
kfree(dir);
return ret;
}
-
+ dir->tr = tr;
filp->private_data = dir;
return 0;
@@ -1781,6 +1763,10 @@
struct seq_file *m;
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = seq_open(file, seq_ops);
if (ret < 0)
return ret;
@@ -1805,6 +1791,7 @@
{
const struct seq_operations *seq_ops = &show_event_seq_ops;
+ /* Checks for tracefs lockdown */
return ftrace_event_open(inode, file, seq_ops);
}
@@ -1815,8 +1802,9 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
@@ -1835,8 +1823,9 @@
struct trace_array *tr = inode->i_private;
int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
@@ -2309,7 +2298,8 @@
int trace_add_event_call(struct trace_event_call *call)
{
int ret;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
mutex_lock(&trace_types_lock);
ret = __register_event(call, NULL);
@@ -2317,7 +2307,6 @@
__add_event_to_tracers(call);
mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return ret;
}
@@ -2371,13 +2360,13 @@
{
int ret;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
mutex_lock(&trace_types_lock);
down_write(&trace_event_sem);
ret = probe_remove_event_call(call);
up_write(&trace_event_sem);
mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return ret;
}
@@ -3192,7 +3181,7 @@
event_trace_enable();
}
-#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
static DEFINE_SPINLOCK(test_spinlock);
static DEFINE_SPINLOCK(test_spinlock_irq);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 5574e86..c9a74f8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -66,7 +66,8 @@
C(INVALID_FILTER, "Meaningless filter expression"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
- C(NO_FILTER, "No filter found"),
+ C(ERRNO, "Error"), \
+ C(NO_FILTER, "No filter found")
#undef C
#define C(a, b) FILT_ERR_##a
@@ -76,7 +77,7 @@
#undef C
#define C(a, b) b
-static char *err_text[] = { ERRORS };
+static const char *err_text[] = { ERRORS };
/* Called after a '!' character but "!=" and "!~" are not "not"s */
static bool is_not(const char *str)
@@ -427,7 +428,7 @@
op_stack = kmalloc_array(nr_parens, sizeof(*op_stack), GFP_KERNEL);
if (!op_stack)
return ERR_PTR(-ENOMEM);
- prog_stack = kmalloc_array(nr_preds, sizeof(*prog_stack), GFP_KERNEL);
+ prog_stack = kcalloc(nr_preds, sizeof(*prog_stack), GFP_KERNEL);
if (!prog_stack) {
parse_error(pe, -ENOMEM, 0);
goto out_free;
@@ -451,8 +452,10 @@
switch (*next) {
case '(': /* #2 */
- if (top - op_stack > nr_parens)
- return ERR_PTR(-EINVAL);
+ if (top - op_stack > nr_parens) {
+ ret = -EINVAL;
+ goto out_free;
+ }
*(++top) = invert;
continue;
case '!': /* #3 */
@@ -491,10 +494,12 @@
break;
case '&':
case '|':
+ /* accepting only "&&" or "||" */
if (next[1] == next[0]) {
ptr++;
break;
}
+ /* fall through */
default:
parse_error(pe, FILT_ERR_TOO_MANY_PREDS,
next - str);
@@ -576,7 +581,11 @@
out_free:
kfree(op_stack);
kfree(inverts);
- kfree(prog_stack);
+ if (prog_stack) {
+ for (i = 0; prog_stack[i].pred; i++)
+ kfree(prog_stack[i].pred);
+ kfree(prog_stack);
+ }
return ERR_PTR(ret);
}
@@ -823,6 +832,9 @@
*search = buff;
+ if (isdigit(buff[0]))
+ return MATCH_INDEX;
+
for (i = 0; i < len; i++) {
if (buff[i] == '*') {
if (!i) {
@@ -860,6 +872,8 @@
}
switch (type) {
+ /* MATCH_INDEX should not happen, but if it does, match full */
+ case MATCH_INDEX:
case MATCH_FULL:
r->match = regex_match_full;
break;
@@ -912,7 +926,8 @@
filter->filter_string = NULL;
}
-static void append_filter_err(struct filter_parse_error *pe,
+static void append_filter_err(struct trace_array *tr,
+ struct filter_parse_error *pe,
struct event_filter *filter)
{
struct trace_seq *s;
@@ -940,8 +955,14 @@
if (pe->lasterr > 0) {
trace_seq_printf(s, "\n%*s", pos, "^");
trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]);
+ tracing_log_err(tr, "event filter parse error",
+ filter->filter_string, err_text,
+ pe->lasterr, pe->lasterr_pos);
} else {
trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr);
+ tracing_log_err(tr, "event filter parse error",
+ filter->filter_string, err_text,
+ FILT_ERR_ERRNO, 0);
}
trace_seq_putc(s, 0);
buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL);
@@ -1065,6 +1086,9 @@
if (strchr(type, '[') && strstr(type, "char"))
return FILTER_STATIC_STRING;
+ if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0)
+ return FILTER_PTR_STRING;
+
return FILTER_OTHER;
}
@@ -1207,30 +1231,30 @@
* (perf doesn't use it) and grab everything.
*/
if (strcmp(field->name, "ip") != 0) {
- parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
- goto err_free;
- }
- pred->fn = filter_pred_none;
+ parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
+ goto err_free;
+ }
+ pred->fn = filter_pred_none;
- /*
- * Quotes are not required, but if they exist then we need
- * to read them till we hit a matching one.
- */
- if (str[i] == '\'' || str[i] == '"')
- q = str[i];
- else
- q = 0;
+ /*
+ * Quotes are not required, but if they exist then we need
+ * to read them till we hit a matching one.
+ */
+ if (str[i] == '\'' || str[i] == '"')
+ q = str[i];
+ else
+ q = 0;
- for (i++; str[i]; i++) {
- if (q && str[i] == q)
- break;
- if (!q && (str[i] == ')' || str[i] == '&' ||
- str[i] == '|'))
- break;
- }
- /* Skip quotes */
- if (q)
- s++;
+ for (i++; str[i]; i++) {
+ if (q && str[i] == q)
+ break;
+ if (!q && (str[i] == ')' || str[i] == '&' ||
+ str[i] == '|'))
+ break;
+ }
+ /* Skip quotes */
+ if (q)
+ s++;
len = i - s;
if (len >= MAX_FILTER_STR_VAL) {
parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
@@ -1301,7 +1325,7 @@
/* go past the last quote */
i++;
- } else if (isdigit(str[i])) {
+ } else if (isdigit(str[i]) || str[i] == '-') {
/* Make sure the field is not a string */
if (is_string_field(field)) {
@@ -1314,6 +1338,9 @@
goto err_free;
}
+ if (str[i] == '-')
+ i++;
+
/* We allow 0xDEADBEEF */
while (isalnum(str[i]))
i++;
@@ -1590,7 +1617,7 @@
if (err) {
filter_disable(file);
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
- append_filter_err(pe, filter);
+ append_filter_err(tr, pe, filter);
} else
event_set_filtered_flag(file);
@@ -1616,7 +1643,7 @@
/*
* The calls can still be using the old filters.
- * Do a synchronize_sched() and to ensure all calls are
+ * Do a synchronize_rcu() and to ensure all calls are
* done with them before we free them.
*/
tracepoint_synchronize_unregister();
@@ -1702,7 +1729,8 @@
* information if @set_str is %true and the caller is responsible for
* freeing it.
*/
-static int create_filter(struct trace_event_call *call,
+static int create_filter(struct trace_array *tr,
+ struct trace_event_call *call,
char *filter_string, bool set_str,
struct event_filter **filterp)
{
@@ -1719,17 +1747,18 @@
err = process_preds(call, filter_string, *filterp, pe);
if (err && set_str)
- append_filter_err(pe, *filterp);
+ append_filter_err(tr, pe, *filterp);
create_filter_finish(pe);
return err;
}
-int create_event_filter(struct trace_event_call *call,
+int create_event_filter(struct trace_array *tr,
+ struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
- return create_filter(call, filter_str, set_str, filterp);
+ return create_filter(tr, call, filter_str, set_str, filterp);
}
/**
@@ -1756,7 +1785,7 @@
kfree((*filterp)->filter_string);
(*filterp)->filter_string = NULL;
} else {
- append_filter_err(pe, *filterp);
+ append_filter_err(tr, pe, *filterp);
}
}
create_filter_finish(pe);
@@ -1787,7 +1816,7 @@
return 0;
}
- err = create_filter(call, filter_string, true, &filter);
+ err = create_filter(file->tr, call, filter_string, true, &filter);
/*
* Always swap the call filter with the new filter
@@ -1848,7 +1877,7 @@
if (filter) {
/*
* No event actually uses the system filter
- * we can free it without synchronize_sched().
+ * we can free it without synchronize_rcu().
*/
__free_filter(system->filter);
system->filter = filter;
@@ -2043,7 +2072,7 @@
if (event->filter)
goto out_unlock;
- err = create_filter(call, filter_str, false, &filter);
+ err = create_filter(NULL, call, filter_str, false, &filter);
if (err)
goto free_filter;
@@ -2192,8 +2221,8 @@
struct test_filter_data_t *d = &test_filter_data[i];
int err;
- err = create_filter(&event_ftrace_test_filter, d->filter,
- false, &filter);
+ err = create_filter(NULL, &event_ftrace_test_filter,
+ d->filter, false, &filter);
if (err) {
printk(KERN_INFO
"Failed to get filter for '%s', err %d\n",
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index eb908ef..7482a14 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -7,20 +7,77 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/stacktrace.h>
#include <linux/rculist.h>
#include <linux/tracefs.h>
+/* for gfp flag names */
+#include <linux/trace_events.h>
+#include <trace/events/mmflags.h>
+
#include "tracing_map.h"
#include "trace.h"
+#include "trace_dynevent.h"
#define SYNTH_SYSTEM "synthetic"
#define SYNTH_FIELDS_MAX 16
#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */
+#define ERRORS \
+ C(NONE, "No error"), \
+ C(DUPLICATE_VAR, "Variable already defined"), \
+ C(VAR_NOT_UNIQUE, "Variable name not unique, need to use fully qualified name (subsys.event.var) for variable"), \
+ C(TOO_MANY_VARS, "Too many variables defined"), \
+ C(MALFORMED_ASSIGNMENT, "Malformed assignment"), \
+ C(NAMED_MISMATCH, "Named hist trigger doesn't match existing named trigger (includes variables)"), \
+ C(TRIGGER_EEXIST, "Hist trigger already exists"), \
+ C(TRIGGER_ENOENT_CLEAR, "Can't clear or continue a nonexistent hist trigger"), \
+ C(SET_CLOCK_FAIL, "Couldn't set trace_clock"), \
+ C(BAD_FIELD_MODIFIER, "Invalid field modifier"), \
+ C(TOO_MANY_SUBEXPR, "Too many subexpressions (3 max)"), \
+ C(TIMESTAMP_MISMATCH, "Timestamp units in expression don't match"), \
+ C(TOO_MANY_FIELD_VARS, "Too many field variables defined"), \
+ C(EVENT_FILE_NOT_FOUND, "Event file not found"), \
+ C(HIST_NOT_FOUND, "Matching event histogram not found"), \
+ C(HIST_CREATE_FAIL, "Couldn't create histogram for field"), \
+ C(SYNTH_VAR_NOT_FOUND, "Couldn't find synthetic variable"), \
+ C(SYNTH_EVENT_NOT_FOUND,"Couldn't find synthetic event"), \
+ C(SYNTH_TYPE_MISMATCH, "Param type doesn't match synthetic event field type"), \
+ C(SYNTH_COUNT_MISMATCH, "Param count doesn't match synthetic event field count"), \
+ C(FIELD_VAR_PARSE_FAIL, "Couldn't parse field variable"), \
+ C(VAR_CREATE_FIND_FAIL, "Couldn't create or find variable"), \
+ C(ONX_NOT_VAR, "For onmax(x) or onchange(x), x must be a variable"), \
+ C(ONX_VAR_NOT_FOUND, "Couldn't find onmax or onchange variable"), \
+ C(ONX_VAR_CREATE_FAIL, "Couldn't create onmax or onchange variable"), \
+ C(FIELD_VAR_CREATE_FAIL,"Couldn't create field variable"), \
+ C(TOO_MANY_PARAMS, "Too many action params"), \
+ C(PARAM_NOT_FOUND, "Couldn't find param"), \
+ C(INVALID_PARAM, "Invalid action param"), \
+ C(ACTION_NOT_FOUND, "No action found"), \
+ C(NO_SAVE_PARAMS, "No params found for save()"), \
+ C(TOO_MANY_SAVE_ACTIONS,"Can't have more than one save() action per hist"), \
+ C(ACTION_MISMATCH, "Handler doesn't support action"), \
+ C(NO_CLOSING_PAREN, "No closing paren found"), \
+ C(SUBSYS_NOT_FOUND, "Missing subsystem"), \
+ C(INVALID_SUBSYS_EVENT, "Invalid subsystem or event name"), \
+ C(INVALID_REF_KEY, "Using variable references in keys not supported"), \
+ C(VAR_NOT_FOUND, "Couldn't find variable"), \
+ C(FIELD_NOT_FOUND, "Couldn't find field"),
+
+#undef C
+#define C(a, b) HIST_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b) b
+
+static const char *err_text[] = { ERRORS };
+
struct hist_field;
typedef u64 (*hist_field_fn_t) (struct hist_field *field,
@@ -39,6 +96,16 @@
FIELD_OP_UNARY_MINUS,
};
+/*
+ * A hist_var (histogram variable) contains variable information for
+ * hist_fields having the HIST_FIELD_FL_VAR or HIST_FIELD_FL_VAR_REF
+ * flag set. A hist_var has a variable name e.g. ts0, and is
+ * associated with a given histogram trigger, as specified by
+ * hist_data. The hist_var idx is the unique index assigned to the
+ * variable by the hist trigger's tracing_map. The idx is what is
+ * used to set a variable's value and, by a variable reference, to
+ * retrieve it.
+ */
struct hist_var {
char *name;
struct hist_trigger_data *hist_data;
@@ -55,12 +122,29 @@
const char *type;
struct hist_field *operands[HIST_FIELD_OPERANDS_MAX];
struct hist_trigger_data *hist_data;
+
+ /*
+ * Variable fields contain variable-specific info in var.
+ */
struct hist_var var;
enum field_op_id operator;
char *system;
char *event_name;
+
+ /*
+ * The name field is used for EXPR and VAR_REF fields. VAR
+ * fields contain the variable name in var.name.
+ */
char *name;
- unsigned int var_idx;
+
+ /*
+ * When a histogram trigger is hit, if it has any references
+ * to variables, the values of those variables are collected
+ * into a var_ref_vals array by resolve_var_refs(). The
+ * current value of each variable is read from the tracing_map
+ * using the hist field's hist_var.idx and entered into the
+ * var_ref_idx entry i.e. var_ref_vals[var_ref_idx].
+ */
unsigned int var_ref_idx;
bool read_once;
};
@@ -279,17 +363,30 @@
struct action_data *actions[HIST_ACTIONS_MAX];
unsigned int n_actions;
- struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX];
- unsigned int n_synth_var_refs;
struct field_var *field_vars[SYNTH_FIELDS_MAX];
unsigned int n_field_vars;
unsigned int n_field_var_str;
struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX];
unsigned int n_field_var_hists;
- struct field_var *max_vars[SYNTH_FIELDS_MAX];
- unsigned int n_max_vars;
- unsigned int n_max_var_str;
+ struct field_var *save_vars[SYNTH_FIELDS_MAX];
+ unsigned int n_save_vars;
+ unsigned int n_save_var_str;
+};
+
+static int synth_event_create(int argc, const char **argv);
+static int synth_event_show(struct seq_file *m, struct dyn_event *ev);
+static int synth_event_release(struct dyn_event *ev);
+static bool synth_event_is_busy(struct dyn_event *ev);
+static bool synth_event_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev);
+
+static struct dyn_event_operations synth_event_ops = {
+ .create = synth_event_create,
+ .show = synth_event_show,
+ .is_busy = synth_event_is_busy,
+ .free = synth_event_release,
+ .match = synth_event_match,
};
struct synth_field {
@@ -301,7 +398,7 @@
};
struct synth_event {
- struct list_head list;
+ struct dyn_event devent;
int ref;
char *name;
struct synth_field **fields;
@@ -312,99 +409,233 @@
struct tracepoint *tp;
};
+static bool is_synth_event(struct dyn_event *ev)
+{
+ return ev->ops == &synth_event_ops;
+}
+
+static struct synth_event *to_synth_event(struct dyn_event *ev)
+{
+ return container_of(ev, struct synth_event, devent);
+}
+
+static bool synth_event_is_busy(struct dyn_event *ev)
+{
+ struct synth_event *event = to_synth_event(ev);
+
+ return event->ref != 0;
+}
+
+static bool synth_event_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev)
+{
+ struct synth_event *sev = to_synth_event(ev);
+
+ return strcmp(sev->name, event) == 0 &&
+ (!system || strcmp(system, SYNTH_SYSTEM) == 0);
+}
+
struct action_data;
typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
struct tracing_map_elt *elt, void *rec,
- struct ring_buffer_event *rbe,
+ struct ring_buffer_event *rbe, void *key,
struct action_data *data, u64 *var_ref_vals);
+typedef bool (*check_track_val_fn_t) (u64 track_val, u64 var_val);
+
+enum handler_id {
+ HANDLER_ONMATCH = 1,
+ HANDLER_ONMAX,
+ HANDLER_ONCHANGE,
+};
+
+enum action_id {
+ ACTION_SAVE = 1,
+ ACTION_TRACE,
+ ACTION_SNAPSHOT,
+};
+
struct action_data {
+ enum handler_id handler;
+ enum action_id action;
+ char *action_name;
action_fn_t fn;
+
unsigned int n_params;
char *params[SYNTH_FIELDS_MAX];
+ /*
+ * When a histogram trigger is hit, the values of any
+ * references to variables, including variables being passed
+ * as parameters to synthetic events, are collected into a
+ * var_ref_vals array. This var_ref_idx is the index of the
+ * first param in the array to be passed to the synthetic
+ * event invocation.
+ */
+ unsigned int var_ref_idx;
+ struct synth_event *synth_event;
+ bool use_trace_keyword;
+ char *synth_event_name;
+
union {
struct {
- unsigned int var_ref_idx;
- char *match_event;
- char *match_event_system;
- char *synth_event_name;
- struct synth_event *synth_event;
- } onmatch;
+ char *event;
+ char *event_system;
+ } match_data;
struct {
+ /*
+ * var_str contains the $-unstripped variable
+ * name referenced by var_ref, and used when
+ * printing the action. Because var_ref
+ * creation is deferred to create_actions(),
+ * we need a per-action way to save it until
+ * then, thus var_str.
+ */
char *var_str;
- char *fn_name;
- unsigned int max_var_ref_idx;
- struct hist_field *max_var;
- struct hist_field *var;
- } onmax;
+
+ /*
+ * var_ref refers to the variable being
+ * tracked e.g onmax($var).
+ */
+ struct hist_field *var_ref;
+
+ /*
+ * track_var contains the 'invisible' tracking
+ * variable created to keep the current
+ * e.g. max value.
+ */
+ struct hist_field *track_var;
+
+ check_track_val_fn_t check_val;
+ action_fn_t save_data;
+ } track_data;
};
};
+struct track_data {
+ u64 track_val;
+ bool updated;
-static char last_hist_cmd[MAX_FILTER_STR_VAL];
-static char hist_err_str[MAX_FILTER_STR_VAL];
+ unsigned int key_len;
+ void *key;
+ struct tracing_map_elt elt;
-static void last_cmd_set(char *str)
+ struct action_data *action_data;
+ struct hist_trigger_data *hist_data;
+};
+
+struct hist_elt_data {
+ char *comm;
+ u64 *var_ref_vals;
+ char *field_var_str[SYNTH_FIELDS_MAX];
+};
+
+struct snapshot_context {
+ struct tracing_map_elt *elt;
+ void *key;
+};
+
+static void track_data_free(struct track_data *track_data)
{
- if (!str)
+ struct hist_elt_data *elt_data;
+
+ if (!track_data)
return;
- strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1);
+ kfree(track_data->key);
+
+ elt_data = track_data->elt.private_data;
+ if (elt_data) {
+ kfree(elt_data->comm);
+ kfree(elt_data);
+ }
+
+ kfree(track_data);
}
-static void hist_err(char *str, char *var)
+static struct track_data *track_data_alloc(unsigned int key_len,
+ struct action_data *action_data,
+ struct hist_trigger_data *hist_data)
{
- int maxlen = MAX_FILTER_STR_VAL - 1;
+ struct track_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
+ struct hist_elt_data *elt_data;
+
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ data->key = kzalloc(key_len, GFP_KERNEL);
+ if (!data->key) {
+ track_data_free(data);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ data->key_len = key_len;
+ data->action_data = action_data;
+ data->hist_data = hist_data;
+
+ elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
+ if (!elt_data) {
+ track_data_free(data);
+ return ERR_PTR(-ENOMEM);
+ }
+ data->elt.private_data = elt_data;
+
+ elt_data->comm = kzalloc(TASK_COMM_LEN, GFP_KERNEL);
+ if (!elt_data->comm) {
+ track_data_free(data);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return data;
+}
+
+static char last_cmd[MAX_FILTER_STR_VAL];
+static char last_cmd_loc[MAX_FILTER_STR_VAL];
+
+static int errpos(char *str)
+{
+ return err_pos(last_cmd, str);
+}
+
+static void last_cmd_set(struct trace_event_file *file, char *str)
+{
+ const char *system = NULL, *name = NULL;
+ struct trace_event_call *call;
if (!str)
return;
- if (strlen(hist_err_str))
- return;
+ strncpy(last_cmd, str, MAX_FILTER_STR_VAL - 1);
- if (!var)
- var = "";
+ if (file) {
+ call = file->event_call;
- if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen)
- return;
+ system = call->class->system;
+ if (system) {
+ name = trace_event_name(call);
+ if (!name)
+ system = NULL;
+ }
+ }
- strcat(hist_err_str, str);
- strcat(hist_err_str, var);
+ if (system)
+ snprintf(last_cmd_loc, MAX_FILTER_STR_VAL, "hist:%s:%s", system, name);
}
-static void hist_err_event(char *str, char *system, char *event, char *var)
+static void hist_err(struct trace_array *tr, u8 err_type, u8 err_pos)
{
- char err[MAX_FILTER_STR_VAL];
-
- if (system && var)
- snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var);
- else if (system)
- snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
- else
- strscpy(err, var, MAX_FILTER_STR_VAL);
-
- hist_err(str, err);
+ tracing_log_err(tr, last_cmd_loc, last_cmd, err_text,
+ err_type, err_pos);
}
static void hist_err_clear(void)
{
- hist_err_str[0] = '\0';
+ last_cmd[0] = '\0';
+ last_cmd_loc[0] = '\0';
}
-static bool have_hist_err(void)
-{
- if (strlen(hist_err_str))
- return true;
-
- return false;
-}
-
-static LIST_HEAD(synth_event_list);
-static DEFINE_MUTEX(synth_event_mutex);
-
struct synth_trace_event {
struct trace_entry ent;
u64 fields[];
@@ -446,7 +677,9 @@
static bool synth_field_signed(char *type)
{
- if (strncmp(type, "u", 1) == 0)
+ if (str_has_prefix(type, "u"))
+ return false;
+ if (strcmp(type, "gfp_t") == 0)
return false;
return true;
@@ -469,7 +702,7 @@
start = strstr(type, "char[");
if (start == NULL)
return -EINVAL;
- start += strlen("char[");
+ start += sizeof("char[") - 1;
end = strchr(type, ']');
if (!end || end < start)
@@ -526,6 +759,8 @@
size = sizeof(unsigned long);
else if (strcmp(type, "pid_t") == 0)
size = sizeof(pid_t);
+ else if (strcmp(type, "gfp_t") == 0)
+ size = sizeof(gfp_t);
else if (synth_field_is_string(type))
size = synth_field_string_size(type);
@@ -566,6 +801,8 @@
fmt = "%lu";
else if (strcmp(type, "pid_t") == 0)
fmt = "%d";
+ else if (strcmp(type, "gfp_t") == 0)
+ fmt = "%x";
else if (synth_field_is_string(type))
fmt = "%s";
@@ -608,9 +845,20 @@
i == se->n_fields - 1 ? "" : " ");
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
+ struct trace_print_flags __flags[] = {
+ __def_gfpflag_names, {-1, NULL} };
+
trace_seq_printf(s, print_fmt, se->fields[i]->name,
entry->fields[n_u64],
i == se->n_fields - 1 ? "" : " ");
+
+ if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
+ trace_seq_puts(s, " (");
+ trace_print_flags_seq(s, "|",
+ entry->fields[n_u64],
+ __flags);
+ trace_seq_putc(s, ')');
+ }
n_u64++;
}
}
@@ -738,14 +986,12 @@
kfree(field);
}
-static struct synth_field *parse_synth_field(int argc, char **argv,
+static struct synth_field *parse_synth_field(int argc, const char **argv,
int *consumed)
{
struct synth_field *field;
- const char *prefix = NULL;
- char *field_type = argv[0], *field_name;
+ const char *prefix = NULL, *field_type = argv[0], *field_name, *array;
int len, ret = 0;
- char *array;
if (field_type[0] == ';')
field_type++;
@@ -762,20 +1008,31 @@
*consumed = 2;
}
- len = strlen(field_name);
- if (field_name[len - 1] == ';')
- field_name[len - 1] = '\0';
-
field = kzalloc(sizeof(*field), GFP_KERNEL);
if (!field)
return ERR_PTR(-ENOMEM);
- len = strlen(field_type) + 1;
+ len = strlen(field_name);
array = strchr(field_name, '[');
if (array)
+ len -= strlen(array);
+ else if (field_name[len - 1] == ';')
+ len--;
+
+ field->name = kmemdup_nul(field_name, len, GFP_KERNEL);
+ if (!field->name) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ if (field_type[0] == ';')
+ field_type++;
+ len = strlen(field_type) + 1;
+ if (array)
len += strlen(array);
if (prefix)
len += strlen(prefix);
+
field->type = kzalloc(len, GFP_KERNEL);
if (!field->type) {
ret = -ENOMEM;
@@ -786,7 +1043,8 @@
strcat(field->type, field_type);
if (array) {
strcat(field->type, array);
- *array = '\0';
+ if (field->type[len - 1] == ';')
+ field->type[len - 1] = '\0';
}
field->size = synth_field_size(field->type);
@@ -800,11 +1058,6 @@
field->is_signed = synth_field_signed(field->type);
- field->name = kstrdup(field_name, GFP_KERNEL);
- if (!field->name) {
- ret = -ENOMEM;
- goto free;
- }
out:
return field;
free:
@@ -868,9 +1121,13 @@
static struct synth_event *find_synth_event(const char *name)
{
+ struct dyn_event *pos;
struct synth_event *event;
- list_for_each_entry(event, &synth_event_list, list) {
+ for_each_dyn_event(pos) {
+ if (!is_synth_event(pos))
+ continue;
+ event = to_synth_event(pos);
if (strcmp(event->name, name) == 0)
return event;
}
@@ -959,7 +1216,7 @@
kfree(event);
}
-static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+static struct synth_event *alloc_synth_event(const char *name, int n_fields,
struct synth_field **fields)
{
struct synth_event *event;
@@ -971,7 +1228,7 @@
goto out;
}
- event->name = kstrdup(event_name, GFP_KERNEL);
+ event->name = kstrdup(name, GFP_KERNEL);
if (!event->name) {
kfree(event);
event = ERR_PTR(-ENOMEM);
@@ -985,6 +1242,8 @@
goto out;
}
+ dyn_event_init(&event->devent, &synth_event_ops);
+
for (i = 0; i < n_fields; i++)
event->fields[i] = fields[i];
@@ -995,12 +1254,12 @@
static void action_trace(struct hist_trigger_data *hist_data,
struct tracing_map_elt *elt, void *rec,
- struct ring_buffer_event *rbe,
+ struct ring_buffer_event *rbe, void *key,
struct action_data *data, u64 *var_ref_vals)
{
- struct synth_event *event = data->onmatch.synth_event;
+ struct synth_event *event = data->synth_event;
- trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx);
+ trace_synth(event, var_ref_vals, data->var_ref_idx);
}
struct hist_var_data {
@@ -1008,29 +1267,11 @@
struct hist_trigger_data *hist_data;
};
-static void add_or_delete_synth_event(struct synth_event *event, int delete)
-{
- if (delete)
- free_synth_event(event);
- else {
- mutex_lock(&synth_event_mutex);
- if (!find_synth_event(event->name))
- list_add(&event->list, &synth_event_list);
- else
- free_synth_event(event);
- mutex_unlock(&synth_event_mutex);
- }
-}
-
-static int create_synth_event(int argc, char **argv)
+static int __create_synth_event(int argc, const char *name, const char **argv)
{
struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
struct synth_event *event = NULL;
- bool delete_event = false;
int i, consumed = 0, n_fields = 0, ret = 0;
- char *name;
-
- mutex_lock(&synth_event_mutex);
/*
* Argument syntax:
@@ -1038,42 +1279,19 @@
* - Remove synthetic event: !<event_name> field[;field] ...
* where 'field' = type field_name
*/
- if (argc < 1) {
- ret = -EINVAL;
- goto out;
- }
- name = argv[0];
- if (name[0] == '!') {
- delete_event = true;
- name++;
- }
+ if (name[0] == '\0' || argc < 1)
+ return -EINVAL;
+
+ mutex_lock(&event_mutex);
event = find_synth_event(name);
if (event) {
- if (delete_event) {
- if (event->ref) {
- event = NULL;
- ret = -EBUSY;
- goto out;
- }
- list_del(&event->list);
- goto out;
- }
- event = NULL;
ret = -EEXIST;
goto out;
- } else if (delete_event) {
- ret = -ENOENT;
- goto out;
}
- if (argc < 2) {
- ret = -EINVAL;
- goto out;
- }
-
- for (i = 1; i < argc - 1; i++) {
+ for (i = 0; i < argc - 1; i++) {
if (strcmp(argv[i], ";") == 0)
continue;
if (n_fields == SYNTH_FIELDS_MAX) {
@@ -1101,83 +1319,91 @@
event = NULL;
goto err;
}
+ ret = register_synth_event(event);
+ if (!ret)
+ dyn_event_add(&event->devent);
+ else
+ free_synth_event(event);
out:
- mutex_unlock(&synth_event_mutex);
-
- if (event) {
- if (delete_event) {
- ret = unregister_synth_event(event);
- add_or_delete_synth_event(event, !ret);
- } else {
- ret = register_synth_event(event);
- add_or_delete_synth_event(event, ret);
- }
- }
+ mutex_unlock(&event_mutex);
return ret;
err:
- mutex_unlock(&synth_event_mutex);
-
for (i = 0; i < n_fields; i++)
free_synth_field(fields[i]);
+
+ goto out;
+}
+
+static int create_or_delete_synth_event(int argc, char **argv)
+{
+ const char *name = argv[0];
+ struct synth_event *event = NULL;
+ int ret;
+
+ /* trace_run_command() ensures argc != 0 */
+ if (name[0] == '!') {
+ mutex_lock(&event_mutex);
+ event = find_synth_event(name + 1);
+ if (event) {
+ if (event->ref)
+ ret = -EBUSY;
+ else {
+ ret = unregister_synth_event(event);
+ if (!ret) {
+ dyn_event_remove(&event->devent);
+ free_synth_event(event);
+ }
+ }
+ } else
+ ret = -ENOENT;
+ mutex_unlock(&event_mutex);
+ return ret;
+ }
+
+ ret = __create_synth_event(argc - 1, name, (const char **)argv + 1);
+ return ret == -ECANCELED ? -EINVAL : ret;
+}
+
+static int synth_event_create(int argc, const char **argv)
+{
+ const char *name = argv[0];
+ int len;
+
+ if (name[0] != 's' || name[1] != ':')
+ return -ECANCELED;
+ name += 2;
+
+ /* This interface accepts group name prefix */
+ if (strchr(name, '/')) {
+ len = str_has_prefix(name, SYNTH_SYSTEM "/");
+ if (len == 0)
+ return -EINVAL;
+ name += len;
+ }
+ return __create_synth_event(argc - 1, name, argv + 1);
+}
+
+static int synth_event_release(struct dyn_event *ev)
+{
+ struct synth_event *event = to_synth_event(ev);
+ int ret;
+
+ if (event->ref)
+ return -EBUSY;
+
+ ret = unregister_synth_event(event);
+ if (ret)
+ return ret;
+
+ dyn_event_remove(ev);
free_synth_event(event);
-
- return ret;
+ return 0;
}
-static int release_all_synth_events(void)
-{
- struct list_head release_events;
- struct synth_event *event, *e;
- int ret = 0;
-
- INIT_LIST_HEAD(&release_events);
-
- mutex_lock(&synth_event_mutex);
-
- list_for_each_entry(event, &synth_event_list, list) {
- if (event->ref) {
- mutex_unlock(&synth_event_mutex);
- return -EBUSY;
- }
- }
-
- list_splice_init(&event->list, &release_events);
-
- mutex_unlock(&synth_event_mutex);
-
- list_for_each_entry_safe(event, e, &release_events, list) {
- list_del(&event->list);
-
- ret = unregister_synth_event(event);
- add_or_delete_synth_event(event, !ret);
- }
-
- return ret;
-}
-
-
-static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
-{
- mutex_lock(&synth_event_mutex);
-
- return seq_list_start(&synth_event_list, *pos);
-}
-
-static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &synth_event_list, pos);
-}
-
-static void synth_events_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&synth_event_mutex);
-}
-
-static int synth_events_seq_show(struct seq_file *m, void *v)
+static int __synth_event_show(struct seq_file *m, struct synth_event *event)
{
struct synth_field *field;
- struct synth_event *event = v;
unsigned int i;
seq_printf(m, "%s\t", event->name);
@@ -1195,19 +1421,42 @@
return 0;
}
+static int synth_event_show(struct seq_file *m, struct dyn_event *ev)
+{
+ struct synth_event *event = to_synth_event(ev);
+
+ seq_printf(m, "s:%s/", event->class.system);
+
+ return __synth_event_show(m, event);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_synth_event(ev))
+ return 0;
+
+ return __synth_event_show(m, to_synth_event(ev));
+}
+
static const struct seq_operations synth_events_seq_op = {
- .start = synth_events_seq_start,
- .next = synth_events_seq_next,
- .stop = synth_events_seq_stop,
- .show = synth_events_seq_show
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = synth_events_seq_show,
};
static int synth_events_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = release_all_synth_events();
+ ret = dyn_events_release_all(&synth_event_ops);
if (ret < 0)
return ret;
}
@@ -1220,7 +1469,7 @@
size_t count, loff_t *ppos)
{
return trace_parse_run_command(file, buffer, count, ppos,
- create_synth_event);
+ create_or_delete_synth_event);
}
static const struct file_operations synth_events_fops = {
@@ -1257,82 +1506,73 @@
return cpu;
}
+/**
+ * check_field_for_var_ref - Check if a VAR_REF field references a variable
+ * @hist_field: The VAR_REF field to check
+ * @var_data: The hist trigger that owns the variable
+ * @var_idx: The trigger variable identifier
+ *
+ * Check the given VAR_REF field to see whether or not it references
+ * the given variable associated with the given trigger.
+ *
+ * Return: The VAR_REF field if it does reference the variable, NULL if not
+ */
static struct hist_field *
check_field_for_var_ref(struct hist_field *hist_field,
struct hist_trigger_data *var_data,
unsigned int var_idx)
{
- struct hist_field *found = NULL;
+ WARN_ON(!(hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF));
- if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
- if (hist_field->var.idx == var_idx &&
- hist_field->var.hist_data == var_data) {
- found = hist_field;
- }
- }
+ if (hist_field && hist_field->var.idx == var_idx &&
+ hist_field->var.hist_data == var_data)
+ return hist_field;
- return found;
+ return NULL;
}
-static struct hist_field *
-check_field_for_var_refs(struct hist_trigger_data *hist_data,
- struct hist_field *hist_field,
- struct hist_trigger_data *var_data,
- unsigned int var_idx,
- unsigned int level)
-{
- struct hist_field *found = NULL;
- unsigned int i;
-
- if (level > 3)
- return found;
-
- if (!hist_field)
- return found;
-
- found = check_field_for_var_ref(hist_field, var_data, var_idx);
- if (found)
- return found;
-
- for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
- struct hist_field *operand;
-
- operand = hist_field->operands[i];
- found = check_field_for_var_refs(hist_data, operand, var_data,
- var_idx, level + 1);
- if (found)
- return found;
- }
-
- return found;
-}
-
+/**
+ * find_var_ref - Check if a trigger has a reference to a trigger variable
+ * @hist_data: The hist trigger that might have a reference to the variable
+ * @var_data: The hist trigger that owns the variable
+ * @var_idx: The trigger variable identifier
+ *
+ * Check the list of var_refs[] on the first hist trigger to see
+ * whether any of them are references to the variable on the second
+ * trigger.
+ *
+ * Return: The VAR_REF field referencing the variable if so, NULL if not
+ */
static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
struct hist_trigger_data *var_data,
unsigned int var_idx)
{
- struct hist_field *hist_field, *found = NULL;
+ struct hist_field *hist_field;
unsigned int i;
- for_each_hist_field(i, hist_data) {
- hist_field = hist_data->fields[i];
- found = check_field_for_var_refs(hist_data, hist_field,
- var_data, var_idx, 0);
- if (found)
- return found;
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ hist_field = hist_data->var_refs[i];
+ if (check_field_for_var_ref(hist_field, var_data, var_idx))
+ return hist_field;
}
- for (i = 0; i < hist_data->n_synth_var_refs; i++) {
- hist_field = hist_data->synth_var_refs[i];
- found = check_field_for_var_refs(hist_data, hist_field,
- var_data, var_idx, 0);
- if (found)
- return found;
- }
-
- return found;
+ return NULL;
}
+/**
+ * find_any_var_ref - Check if there is a reference to a given trigger variable
+ * @hist_data: The hist trigger
+ * @var_idx: The trigger variable identifier
+ *
+ * Check to see whether the given variable is currently referenced by
+ * any other trigger.
+ *
+ * The trigger the variable is defined on is explicitly excluded - the
+ * assumption being that a self-reference doesn't prevent a trigger
+ * from being removed.
+ *
+ * Return: The VAR_REF field referencing the variable if so, NULL if not
+ */
static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
unsigned int var_idx)
{
@@ -1351,6 +1591,19 @@
return found;
}
+/**
+ * check_var_refs - Check if there is a reference to any of trigger's variables
+ * @hist_data: The hist trigger
+ *
+ * A trigger can define one or more variables. If any one of them is
+ * currently referenced by any other trigger, this function will
+ * determine that.
+
+ * Typically used to determine whether or not a trigger can be removed
+ * - if there are any references to a trigger's variables, it cannot.
+ *
+ * Return: True if there is a reference to any of trigger's variables
+ */
static bool check_var_refs(struct hist_trigger_data *hist_data)
{
struct hist_field *field;
@@ -1434,7 +1687,7 @@
if (var_data)
return 0;
- if (trace_array_get(tr) < 0)
+ if (tracing_check_open_get_tr(tr))
return -ENODEV;
var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
@@ -1530,7 +1783,7 @@
if (find_var_field(var_hist_data, var_name)) {
if (found) {
- hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+ hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE, errpos(var_name));
return NULL;
}
@@ -1571,9 +1824,9 @@
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
- if (data->fn == action_trace) {
- char *system = data->onmatch.match_event_system;
- char *event_name = data->onmatch.match_event;
+ if (data->handler == HANDLER_ONMATCH) {
+ char *system = data->match_data.event_system;
+ char *event_name = data->match_data.event;
file = find_var_file(tr, system, event_name, var_name);
if (!file)
@@ -1581,7 +1834,8 @@
hist_field = find_file_var(file, var_name);
if (hist_field) {
if (found) {
- hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+ hist_err(tr, HIST_ERR_VAR_NOT_UNIQUE,
+ errpos(var_name));
return ERR_PTR(-EINVAL);
}
@@ -1618,12 +1872,6 @@
return hist_field;
}
-struct hist_elt_data {
- char *comm;
- u64 *var_ref_vals;
- char *field_var_str[SYNTH_FIELDS_MAX];
-};
-
static u64 hist_field_var_ref(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct ring_buffer_event *rbe,
@@ -1632,6 +1880,9 @@
struct hist_elt_data *elt_data;
u64 var_val = 0;
+ if (WARN_ON_ONCE(!elt))
+ return var_val;
+
elt_data = elt->private_data;
var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
@@ -1808,8 +2059,9 @@
if (attrs->n_actions >= HIST_ACTIONS_MAX)
return ret;
- if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
- (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
+ if ((str_has_prefix(str, "onmatch(")) ||
+ (str_has_prefix(str, "onmax(")) ||
+ (str_has_prefix(str, "onchange("))) {
attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
if (!attrs->action_str[attrs->n_actions]) {
ret = -ENOMEM;
@@ -1818,42 +2070,42 @@
attrs->n_actions++;
ret = 0;
}
-
return ret;
}
-static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+static int parse_assignment(struct trace_array *tr,
+ char *str, struct hist_trigger_attrs *attrs)
{
int ret = 0;
- if ((strncmp(str, "key=", strlen("key=")) == 0) ||
- (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+ if ((str_has_prefix(str, "key=")) ||
+ (str_has_prefix(str, "keys="))) {
attrs->keys_str = kstrdup(str, GFP_KERNEL);
if (!attrs->keys_str) {
ret = -ENOMEM;
goto out;
}
- } else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
- (strncmp(str, "vals=", strlen("vals=")) == 0) ||
- (strncmp(str, "values=", strlen("values=")) == 0)) {
+ } else if ((str_has_prefix(str, "val=")) ||
+ (str_has_prefix(str, "vals=")) ||
+ (str_has_prefix(str, "values="))) {
attrs->vals_str = kstrdup(str, GFP_KERNEL);
if (!attrs->vals_str) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+ } else if (str_has_prefix(str, "sort=")) {
attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
if (!attrs->sort_key_str) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "name=", strlen("name=")) == 0) {
+ } else if (str_has_prefix(str, "name=")) {
attrs->name = kstrdup(str, GFP_KERNEL);
if (!attrs->name) {
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+ } else if (str_has_prefix(str, "clock=")) {
strsep(&str, "=");
if (!str) {
ret = -EINVAL;
@@ -1866,7 +2118,7 @@
ret = -ENOMEM;
goto out;
}
- } else if (strncmp(str, "size=", strlen("size=")) == 0) {
+ } else if (str_has_prefix(str, "size=")) {
int map_bits = parse_map_size(str);
if (map_bits < 0) {
@@ -1878,7 +2130,7 @@
char *assignment;
if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
- hist_err("Too many variables defined: ", str);
+ hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(str));
ret = -EINVAL;
goto out;
}
@@ -1895,7 +2147,8 @@
return ret;
}
-static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
+static struct hist_trigger_attrs *
+parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str)
{
struct hist_trigger_attrs *attrs;
int ret = 0;
@@ -1908,7 +2161,7 @@
char *str = strsep(&trigger_str, ":");
if (strchr(str, '=')) {
- ret = parse_assignment(str, attrs);
+ ret = parse_assignment(tr, str, attrs);
if (ret)
goto free;
} else if (strcmp(str, "pause") == 0)
@@ -1957,7 +2210,7 @@
return;
}
- memcpy(comm, task->comm, TASK_COMM_LEN);
+ strncpy(comm, task->comm, TASK_COMM_LEN);
}
static void hist_elt_data_free(struct hist_elt_data *elt_data)
@@ -2003,7 +2256,7 @@
}
}
- n_str = hist_data->n_field_var_str + hist_data->n_max_var_str;
+ n_str = hist_data->n_field_var_str + hist_data->n_save_var_str;
size = STR_VAR_LEN_MAX;
@@ -2151,6 +2404,15 @@
return field_op;
}
+static void __destroy_hist_field(struct hist_field *hist_field)
+{
+ kfree(hist_field->var.name);
+ kfree(hist_field->name);
+ kfree(hist_field->type);
+
+ kfree(hist_field);
+}
+
static void destroy_hist_field(struct hist_field *hist_field,
unsigned int level)
{
@@ -2162,14 +2424,13 @@
if (!hist_field)
return;
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF)
+ return; /* var refs will be destroyed separately */
+
for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
destroy_hist_field(hist_field->operands[i], level + 1);
- kfree(hist_field->var.name);
- kfree(hist_field->name);
- kfree(hist_field->type);
-
- kfree(hist_field);
+ __destroy_hist_field(hist_field);
}
static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
@@ -2296,6 +2557,12 @@
hist_data->fields[i] = NULL;
}
}
+
+ for (i = 0; i < hist_data->n_var_refs; i++) {
+ WARN_ON(!(hist_data->var_refs[i]->flags & HIST_FIELD_FL_VAR_REF));
+ __destroy_hist_field(hist_data->var_refs[i]);
+ hist_data->var_refs[i] = NULL;
+ }
}
static int init_var_ref(struct hist_field *ref_field,
@@ -2354,7 +2621,23 @@
goto out;
}
-static struct hist_field *create_var_ref(struct hist_field *var_field,
+/**
+ * create_var_ref - Create a variable reference and attach it to trigger
+ * @hist_data: The trigger that will be referencing the variable
+ * @var_field: The VAR field to create a reference to
+ * @system: The optional system string
+ * @event_name: The optional event_name string
+ *
+ * Given a variable hist_field, create a VAR_REF hist_field that
+ * represents a reference to it.
+ *
+ * This function also adds the reference to the trigger that
+ * now references the variable.
+ *
+ * Return: The VAR_REF field if successful, NULL if not
+ */
+static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data,
+ struct hist_field *var_field,
char *system, char *event_name)
{
unsigned long flags = HIST_FIELD_FL_VAR_REF;
@@ -2366,6 +2649,9 @@
destroy_hist_field(ref_field, 0);
return NULL;
}
+
+ hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+ ref_field->var_ref_idx = hist_data->n_var_refs++;
}
return ref_field;
@@ -2431,6 +2717,7 @@
char *var_name)
{
struct hist_field *var_field = NULL, *ref_field = NULL;
+ struct trace_array *tr = hist_data->event_file->tr;
if (!is_var_ref(var_name))
return NULL;
@@ -2439,11 +2726,11 @@
var_field = find_event_var(hist_data, system, event_name, var_name);
if (var_field)
- ref_field = create_var_ref(var_field, system, event_name);
+ ref_field = create_var_ref(hist_data, var_field,
+ system, event_name);
if (!ref_field)
- hist_err_event("Couldn't find variable: $",
- system, event_name, var_name);
+ hist_err(tr, HIST_ERR_VAR_NOT_FOUND, errpos(var_name));
return ref_field;
}
@@ -2454,6 +2741,7 @@
{
struct ftrace_event_field *field = NULL;
char *field_name, *modifier, *str;
+ struct trace_array *tr = file->tr;
modifier = str = kstrdup(field_str, GFP_KERNEL);
if (!modifier)
@@ -2477,7 +2765,7 @@
else if (strcmp(modifier, "usecs") == 0)
*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
- hist_err("Invalid field modifier: ", modifier);
+ hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2493,7 +2781,7 @@
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
- hist_err("Couldn't find field: ", field_name);
+ hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name));
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2523,6 +2811,8 @@
return NULL;
}
+ alias->var_ref_idx = var_ref->var_ref_idx;
+
return alias;
}
@@ -2555,10 +2845,9 @@
s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var);
if (!s) {
- hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
+ hist_field = parse_var_ref(hist_data, ref_system,
+ ref_event, ref_var);
if (hist_field) {
- hist_data->var_refs[hist_data->n_var_refs] = hist_field;
- hist_field->var_ref_idx = hist_data->n_var_refs++;
if (var_name) {
hist_field = create_alias(hist_data, hist_field, var_name);
if (!hist_field) {
@@ -2606,7 +2895,7 @@
/* we support only -(xxx) i.e. explicit parens required */
if (level > 3) {
- hist_err("Too many subexpressions (3 max): ", str);
+ hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str));
ret = -EINVAL;
goto free;
}
@@ -2661,7 +2950,8 @@
return ERR_PTR(ret);
}
-static int check_expr_operands(struct hist_field *operand1,
+static int check_expr_operands(struct trace_array *tr,
+ struct hist_field *operand1,
struct hist_field *operand2)
{
unsigned long operand1_flags = operand1->flags;
@@ -2689,7 +2979,7 @@
if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
(operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) {
- hist_err("Timestamp units in expression don't match", NULL);
+ hist_err(tr, HIST_ERR_TIMESTAMP_MISMATCH, 0);
return -EINVAL;
}
@@ -2707,7 +2997,7 @@
char *sep, *operand1_str;
if (level > 3) {
- hist_err("Too many subexpressions (3 max): ", str);
+ hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str));
return ERR_PTR(-EINVAL);
}
@@ -2752,7 +3042,7 @@
goto free;
}
- ret = check_expr_operands(operand1, operand2);
+ ret = check_expr_operands(file->tr, operand1, operand2);
if (ret)
goto free;
@@ -2945,16 +3235,14 @@
int ret;
if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) {
- hist_err_event("onmatch: Too many field variables defined: ",
- subsys_name, event_name, field_name);
+ hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name));
return ERR_PTR(-EINVAL);
}
file = event_file(tr, subsys_name, event_name);
if (IS_ERR(file)) {
- hist_err_event("onmatch: Event file not found: ",
- subsys_name, event_name, field_name);
+ hist_err(tr, HIST_ERR_EVENT_FILE_NOT_FOUND, errpos(field_name));
ret = PTR_ERR(file);
return ERR_PTR(ret);
}
@@ -2967,8 +3255,7 @@
*/
hist_data = find_compatible_hist(target_hist_data, file);
if (!hist_data) {
- hist_err_event("onmatch: Matching event histogram not found: ",
- subsys_name, event_name, field_name);
+ hist_err(tr, HIST_ERR_HIST_NOT_FOUND, errpos(field_name));
return ERR_PTR(-EINVAL);
}
@@ -3029,8 +3316,7 @@
kfree(cmd);
kfree(var_hist->cmd);
kfree(var_hist);
- hist_err_event("onmatch: Couldn't create histogram for field: ",
- subsys_name, event_name, field_name);
+ hist_err(tr, HIST_ERR_HIST_CREATE_FAIL, errpos(field_name));
return ERR_PTR(ret);
}
@@ -3042,8 +3328,7 @@
if (IS_ERR_OR_NULL(event_var)) {
kfree(var_hist->cmd);
kfree(var_hist);
- hist_err_event("onmatch: Couldn't find synthetic variable: ",
- subsys_name, event_name, field_name);
+ hist_err(tr, HIST_ERR_SYNTH_VAR_NOT_FOUND, errpos(field_name));
return ERR_PTR(-EINVAL);
}
@@ -3120,13 +3405,13 @@
hist_data->n_field_vars, 0);
}
-static void update_max_vars(struct hist_trigger_data *hist_data,
- struct tracing_map_elt *elt,
- struct ring_buffer_event *rbe,
- void *rec)
+static void save_track_data_vars(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, void *key,
+ struct action_data *data, u64 *var_ref_vals)
{
- __update_field_vars(elt, rbe, rec, hist_data->max_vars,
- hist_data->n_max_vars, hist_data->n_field_var_str);
+ __update_field_vars(elt, rbe, rec, hist_data->save_vars,
+ hist_data->n_save_vars, hist_data->n_field_var_str);
}
static struct hist_field *create_var(struct hist_trigger_data *hist_data,
@@ -3176,25 +3461,26 @@
{
struct hist_field *val = NULL, *var = NULL;
unsigned long flags = HIST_FIELD_FL_VAR;
+ struct trace_array *tr = file->tr;
struct field_var *field_var;
int ret = 0;
if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
- hist_err("Too many field variables defined: ", field_name);
+ hist_err(tr, HIST_ERR_TOO_MANY_FIELD_VARS, errpos(field_name));
ret = -EINVAL;
goto err;
}
val = parse_atom(hist_data, file, field_name, &flags, NULL);
if (IS_ERR(val)) {
- hist_err("Couldn't parse field variable: ", field_name);
+ hist_err(tr, HIST_ERR_FIELD_VAR_PARSE_FAIL, errpos(field_name));
ret = PTR_ERR(val);
goto err;
}
var = create_var(hist_data, file, field_name, val->size, val->type);
if (IS_ERR(var)) {
- hist_err("Couldn't create or find variable: ", field_name);
+ hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name));
kfree(val);
ret = PTR_ERR(var);
goto err;
@@ -3261,18 +3547,196 @@
return create_field_var(target_hist_data, file, var_name);
}
-static void onmax_print(struct seq_file *m,
- struct hist_trigger_data *hist_data,
- struct tracing_map_elt *elt,
- struct action_data *data)
+static bool check_track_val_max(u64 track_val, u64 var_val)
{
- unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx;
+ if (var_val <= track_val)
+ return false;
- seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx));
+ return true;
+}
- for (i = 0; i < hist_data->n_max_vars; i++) {
- struct hist_field *save_val = hist_data->max_vars[i]->val;
- struct hist_field *save_var = hist_data->max_vars[i]->var;
+static bool check_track_val_changed(u64 track_val, u64 var_val)
+{
+ if (var_val == track_val)
+ return false;
+
+ return true;
+}
+
+static u64 get_track_val(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct action_data *data)
+{
+ unsigned int track_var_idx = data->track_data.track_var->var.idx;
+ u64 track_val;
+
+ track_val = tracing_map_read_var(elt, track_var_idx);
+
+ return track_val;
+}
+
+static void save_track_val(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct action_data *data, u64 var_val)
+{
+ unsigned int track_var_idx = data->track_data.track_var->var.idx;
+
+ tracing_map_set_var(elt, track_var_idx, var_val);
+}
+
+static void save_track_data(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, void *key,
+ struct action_data *data, u64 *var_ref_vals)
+{
+ if (data->track_data.save_data)
+ data->track_data.save_data(hist_data, elt, rec, rbe, key, data, var_ref_vals);
+}
+
+static bool check_track_val(struct tracing_map_elt *elt,
+ struct action_data *data,
+ u64 var_val)
+{
+ struct hist_trigger_data *hist_data;
+ u64 track_val;
+
+ hist_data = data->track_data.track_var->hist_data;
+ track_val = get_track_val(hist_data, elt, data);
+
+ return data->track_data.check_val(track_val, var_val);
+}
+
+#ifdef CONFIG_TRACER_SNAPSHOT
+static bool cond_snapshot_update(struct trace_array *tr, void *cond_data)
+{
+ /* called with tr->max_lock held */
+ struct track_data *track_data = tr->cond_snapshot->cond_data;
+ struct hist_elt_data *elt_data, *track_elt_data;
+ struct snapshot_context *context = cond_data;
+ struct action_data *action;
+ u64 track_val;
+
+ if (!track_data)
+ return false;
+
+ action = track_data->action_data;
+
+ track_val = get_track_val(track_data->hist_data, context->elt,
+ track_data->action_data);
+
+ if (!action->track_data.check_val(track_data->track_val, track_val))
+ return false;
+
+ track_data->track_val = track_val;
+ memcpy(track_data->key, context->key, track_data->key_len);
+
+ elt_data = context->elt->private_data;
+ track_elt_data = track_data->elt.private_data;
+ if (elt_data->comm)
+ strncpy(track_elt_data->comm, elt_data->comm, TASK_COMM_LEN);
+
+ track_data->updated = true;
+
+ return true;
+}
+
+static void save_track_data_snapshot(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, void *key,
+ struct action_data *data,
+ u64 *var_ref_vals)
+{
+ struct trace_event_file *file = hist_data->event_file;
+ struct snapshot_context context;
+
+ context.elt = elt;
+ context.key = key;
+
+ tracing_snapshot_cond(file->tr, &context);
+}
+
+static void hist_trigger_print_key(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ void *key,
+ struct tracing_map_elt *elt);
+
+static struct action_data *snapshot_action(struct hist_trigger_data *hist_data)
+{
+ unsigned int i;
+
+ if (!hist_data->n_actions)
+ return NULL;
+
+ for (i = 0; i < hist_data->n_actions; i++) {
+ struct action_data *data = hist_data->actions[i];
+
+ if (data->action == ACTION_SNAPSHOT)
+ return data;
+ }
+
+ return NULL;
+}
+
+static void track_data_snapshot_print(struct seq_file *m,
+ struct hist_trigger_data *hist_data)
+{
+ struct trace_event_file *file = hist_data->event_file;
+ struct track_data *track_data;
+ struct action_data *action;
+
+ track_data = tracing_cond_snapshot_data(file->tr);
+ if (!track_data)
+ return;
+
+ if (!track_data->updated)
+ return;
+
+ action = snapshot_action(hist_data);
+ if (!action)
+ return;
+
+ seq_puts(m, "\nSnapshot taken (see tracing/snapshot). Details:\n");
+ seq_printf(m, "\ttriggering value { %s(%s) }: %10llu",
+ action->handler == HANDLER_ONMAX ? "onmax" : "onchange",
+ action->track_data.var_str, track_data->track_val);
+
+ seq_puts(m, "\ttriggered by event with key: ");
+ hist_trigger_print_key(m, hist_data, track_data->key, &track_data->elt);
+ seq_putc(m, '\n');
+}
+#else
+static bool cond_snapshot_update(struct trace_array *tr, void *cond_data)
+{
+ return false;
+}
+static void save_track_data_snapshot(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, void *key,
+ struct action_data *data,
+ u64 *var_ref_vals) {}
+static void track_data_snapshot_print(struct seq_file *m,
+ struct hist_trigger_data *hist_data) {}
+#endif /* CONFIG_TRACER_SNAPSHOT */
+
+static void track_data_print(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt,
+ struct action_data *data)
+{
+ u64 track_val = get_track_val(hist_data, elt, data);
+ unsigned int i, save_var_idx;
+
+ if (data->handler == HANDLER_ONMAX)
+ seq_printf(m, "\n\tmax: %10llu", track_val);
+ else if (data->handler == HANDLER_ONCHANGE)
+ seq_printf(m, "\n\tchanged: %10llu", track_val);
+
+ if (data->action == ACTION_SNAPSHOT)
+ return;
+
+ for (i = 0; i < hist_data->n_save_vars; i++) {
+ struct hist_field *save_val = hist_data->save_vars[i]->val;
+ struct hist_field *save_var = hist_data->save_vars[i]->var;
u64 val;
save_var_idx = save_var->var.idx;
@@ -3287,135 +3751,136 @@
}
}
-static void onmax_save(struct hist_trigger_data *hist_data,
- struct tracing_map_elt *elt, void *rec,
- struct ring_buffer_event *rbe,
- struct action_data *data, u64 *var_ref_vals)
+static void ontrack_action(struct hist_trigger_data *hist_data,
+ struct tracing_map_elt *elt, void *rec,
+ struct ring_buffer_event *rbe, void *key,
+ struct action_data *data, u64 *var_ref_vals)
{
- unsigned int max_idx = data->onmax.max_var->var.idx;
- unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx;
+ u64 var_val = var_ref_vals[data->track_data.var_ref->var_ref_idx];
- u64 var_val, max_val;
-
- var_val = var_ref_vals[max_var_ref_idx];
- max_val = tracing_map_read_var(elt, max_idx);
-
- if (var_val <= max_val)
- return;
-
- tracing_map_set_var(elt, max_idx, var_val);
-
- update_max_vars(hist_data, elt, rbe, rec);
+ if (check_track_val(elt, data, var_val)) {
+ save_track_val(hist_data, elt, data, var_val);
+ save_track_data(hist_data, elt, rec, rbe, key, data, var_ref_vals);
+ }
}
-static void onmax_destroy(struct action_data *data)
+static void action_data_destroy(struct action_data *data)
{
unsigned int i;
- destroy_hist_field(data->onmax.max_var, 0);
- destroy_hist_field(data->onmax.var, 0);
+ lockdep_assert_held(&event_mutex);
- kfree(data->onmax.var_str);
- kfree(data->onmax.fn_name);
+ kfree(data->action_name);
for (i = 0; i < data->n_params; i++)
kfree(data->params[i]);
+ if (data->synth_event)
+ data->synth_event->ref--;
+
+ kfree(data->synth_event_name);
+
kfree(data);
}
-static int onmax_create(struct hist_trigger_data *hist_data,
- struct action_data *data)
+static void track_data_destroy(struct hist_trigger_data *hist_data,
+ struct action_data *data)
{
struct trace_event_file *file = hist_data->event_file;
- struct hist_field *var_field, *ref_field, *max_var;
- unsigned int var_ref_idx = hist_data->n_var_refs;
- struct field_var *field_var;
- char *onmax_var_str, *param;
- unsigned long flags;
- unsigned int i;
+
+ destroy_hist_field(data->track_data.track_var, 0);
+
+ if (data->action == ACTION_SNAPSHOT) {
+ struct track_data *track_data;
+
+ track_data = tracing_cond_snapshot_data(file->tr);
+ if (track_data && track_data->hist_data == hist_data) {
+ tracing_snapshot_cond_disable(file->tr);
+ track_data_free(track_data);
+ }
+ }
+
+ kfree(data->track_data.var_str);
+
+ action_data_destroy(data);
+}
+
+static int action_create(struct hist_trigger_data *hist_data,
+ struct action_data *data);
+
+static int track_data_create(struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ struct hist_field *var_field, *ref_field, *track_var = NULL;
+ struct trace_event_file *file = hist_data->event_file;
+ struct trace_array *tr = file->tr;
+ char *track_data_var_str;
int ret = 0;
- onmax_var_str = data->onmax.var_str;
- if (onmax_var_str[0] != '$') {
- hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str);
+ track_data_var_str = data->track_data.var_str;
+ if (track_data_var_str[0] != '$') {
+ hist_err(tr, HIST_ERR_ONX_NOT_VAR, errpos(track_data_var_str));
return -EINVAL;
}
- onmax_var_str++;
+ track_data_var_str++;
- var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
+ var_field = find_target_event_var(hist_data, NULL, NULL, track_data_var_str);
if (!var_field) {
- hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str);
+ hist_err(tr, HIST_ERR_ONX_VAR_NOT_FOUND, errpos(track_data_var_str));
return -EINVAL;
}
- flags = HIST_FIELD_FL_VAR_REF;
- ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+ ref_field = create_var_ref(hist_data, var_field, NULL, NULL);
if (!ref_field)
return -ENOMEM;
- if (init_var_ref(ref_field, var_field, NULL, NULL)) {
- destroy_hist_field(ref_field, 0);
- ret = -ENOMEM;
+ data->track_data.var_ref = ref_field;
+
+ if (data->handler == HANDLER_ONMAX)
+ track_var = create_var(hist_data, file, "__max", sizeof(u64), "u64");
+ if (IS_ERR(track_var)) {
+ hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0);
+ ret = PTR_ERR(track_var);
goto out;
}
- hist_data->var_refs[hist_data->n_var_refs] = ref_field;
- ref_field->var_ref_idx = hist_data->n_var_refs++;
- data->onmax.var = ref_field;
- data->fn = onmax_save;
- data->onmax.max_var_ref_idx = var_ref_idx;
- max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
- if (IS_ERR(max_var)) {
- hist_err("onmax: Couldn't create onmax variable: ", "max");
- ret = PTR_ERR(max_var);
+ if (data->handler == HANDLER_ONCHANGE)
+ track_var = create_var(hist_data, file, "__change", sizeof(u64), "u64");
+ if (IS_ERR(track_var)) {
+ hist_err(tr, HIST_ERR_ONX_VAR_CREATE_FAIL, 0);
+ ret = PTR_ERR(track_var);
goto out;
}
- data->onmax.max_var = max_var;
+ data->track_data.track_var = track_var;
- for (i = 0; i < data->n_params; i++) {
- param = kstrdup(data->params[i], GFP_KERNEL);
- if (!param) {
- ret = -ENOMEM;
- goto out;
- }
-
- field_var = create_target_field_var(hist_data, NULL, NULL, param);
- if (IS_ERR(field_var)) {
- hist_err("onmax: Couldn't create field variable: ", param);
- ret = PTR_ERR(field_var);
- kfree(param);
- goto out;
- }
-
- hist_data->max_vars[hist_data->n_max_vars++] = field_var;
- if (field_var->val->flags & HIST_FIELD_FL_STRING)
- hist_data->n_max_var_str++;
-
- kfree(param);
- }
+ ret = action_create(hist_data, data);
out:
return ret;
}
-static int parse_action_params(char *params, struct action_data *data)
+static int parse_action_params(struct trace_array *tr, char *params,
+ struct action_data *data)
{
char *param, *saved_param;
+ bool first_param = true;
int ret = 0;
while (params) {
- if (data->n_params >= SYNTH_FIELDS_MAX)
+ if (data->n_params >= SYNTH_FIELDS_MAX) {
+ hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0);
goto out;
+ }
param = strsep(¶ms, ",");
if (!param) {
+ hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, 0);
ret = -EINVAL;
goto out;
}
param = strstrip(param);
if (strlen(param) < 2) {
- hist_err("Invalid action param: ", param);
+ hist_err(tr, HIST_ERR_INVALID_PARAM, errpos(param));
ret = -EINVAL;
goto out;
}
@@ -3426,88 +3891,164 @@
goto out;
}
+ if (first_param && data->use_trace_keyword) {
+ data->synth_event_name = saved_param;
+ first_param = false;
+ continue;
+ }
+ first_param = false;
+
data->params[data->n_params++] = saved_param;
}
out:
return ret;
}
-static struct action_data *onmax_parse(char *str)
+static int action_parse(struct trace_array *tr, char *str, struct action_data *data,
+ enum handler_id handler)
{
- char *onmax_fn_name, *onmax_var_str;
+ char *action_name;
+ int ret = 0;
+
+ strsep(&str, ".");
+ if (!str) {
+ hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ action_name = strsep(&str, "(");
+ if (!action_name || !str) {
+ hist_err(tr, HIST_ERR_ACTION_NOT_FOUND, 0);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (str_has_prefix(action_name, "save")) {
+ char *params = strsep(&str, ")");
+
+ if (!params) {
+ hist_err(tr, HIST_ERR_NO_SAVE_PARAMS, 0);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = parse_action_params(tr, params, data);
+ if (ret)
+ goto out;
+
+ if (handler == HANDLER_ONMAX)
+ data->track_data.check_val = check_track_val_max;
+ else if (handler == HANDLER_ONCHANGE)
+ data->track_data.check_val = check_track_val_changed;
+ else {
+ hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ data->track_data.save_data = save_track_data_vars;
+ data->fn = ontrack_action;
+ data->action = ACTION_SAVE;
+ } else if (str_has_prefix(action_name, "snapshot")) {
+ char *params = strsep(&str, ")");
+
+ if (!str) {
+ hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(params));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (handler == HANDLER_ONMAX)
+ data->track_data.check_val = check_track_val_max;
+ else if (handler == HANDLER_ONCHANGE)
+ data->track_data.check_val = check_track_val_changed;
+ else {
+ hist_err(tr, HIST_ERR_ACTION_MISMATCH, errpos(action_name));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ data->track_data.save_data = save_track_data_snapshot;
+ data->fn = ontrack_action;
+ data->action = ACTION_SNAPSHOT;
+ } else {
+ char *params = strsep(&str, ")");
+
+ if (str_has_prefix(action_name, "trace"))
+ data->use_trace_keyword = true;
+
+ if (params) {
+ ret = parse_action_params(tr, params, data);
+ if (ret)
+ goto out;
+ }
+
+ if (handler == HANDLER_ONMAX)
+ data->track_data.check_val = check_track_val_max;
+ else if (handler == HANDLER_ONCHANGE)
+ data->track_data.check_val = check_track_val_changed;
+
+ if (handler != HANDLER_ONMATCH) {
+ data->track_data.save_data = action_trace;
+ data->fn = ontrack_action;
+ } else
+ data->fn = action_trace;
+
+ data->action = ACTION_TRACE;
+ }
+
+ data->action_name = kstrdup(action_name, GFP_KERNEL);
+ if (!data->action_name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ data->handler = handler;
+ out:
+ return ret;
+}
+
+static struct action_data *track_data_parse(struct hist_trigger_data *hist_data,
+ char *str, enum handler_id handler)
+{
struct action_data *data;
int ret = -EINVAL;
+ char *var_str;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
- onmax_var_str = strsep(&str, ")");
- if (!onmax_var_str || !str) {
+ var_str = strsep(&str, ")");
+ if (!var_str || !str) {
ret = -EINVAL;
goto free;
}
- data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL);
- if (!data->onmax.var_str) {
+ data->track_data.var_str = kstrdup(var_str, GFP_KERNEL);
+ if (!data->track_data.var_str) {
ret = -ENOMEM;
goto free;
}
- strsep(&str, ".");
- if (!str)
+ ret = action_parse(hist_data->event_file->tr, str, data, handler);
+ if (ret)
goto free;
-
- onmax_fn_name = strsep(&str, "(");
- if (!onmax_fn_name || !str)
- goto free;
-
- if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
- char *params = strsep(&str, ")");
-
- if (!params) {
- ret = -EINVAL;
- goto free;
- }
-
- ret = parse_action_params(params, data);
- if (ret)
- goto free;
- } else
- goto free;
-
- data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL);
- if (!data->onmax.fn_name) {
- ret = -ENOMEM;
- goto free;
- }
out:
return data;
free:
- onmax_destroy(data);
+ track_data_destroy(hist_data, data);
data = ERR_PTR(ret);
goto out;
}
static void onmatch_destroy(struct action_data *data)
{
- unsigned int i;
+ kfree(data->match_data.event);
+ kfree(data->match_data.event_system);
- mutex_lock(&synth_event_mutex);
-
- kfree(data->onmatch.match_event);
- kfree(data->onmatch.match_event_system);
- kfree(data->onmatch.synth_event_name);
-
- for (i = 0; i < data->n_params; i++)
- kfree(data->params[i]);
-
- if (data->onmatch.synth_event)
- data->onmatch.synth_event->ref--;
-
- kfree(data);
-
- mutex_unlock(&synth_event_mutex);
+ action_data_destroy(data);
}
static void destroy_field_var(struct field_var *field_var)
@@ -3539,23 +4080,6 @@
}
-static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
-{
- unsigned int i;
-
- for (i = 0; i < hist_data->n_synth_var_refs; i++)
- destroy_hist_field(hist_data->synth_var_refs[i], 0);
-}
-
-static void save_synth_var_ref(struct hist_trigger_data *hist_data,
- struct hist_field *var_ref)
-{
- hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
-
- hist_data->var_refs[hist_data->n_var_refs] = var_ref;
- var_ref->var_ref_idx = hist_data->n_var_refs++;
-}
-
static int check_synth_field(struct synth_event *event,
struct hist_field *hist_field,
unsigned int field_pos)
@@ -3574,33 +4098,35 @@
}
static struct hist_field *
-onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
- char *system, char *event, char *var)
+trace_action_find_var(struct hist_trigger_data *hist_data,
+ struct action_data *data,
+ char *system, char *event, char *var)
{
+ struct trace_array *tr = hist_data->event_file->tr;
struct hist_field *hist_field;
var++; /* skip '$' */
hist_field = find_target_event_var(hist_data, system, event, var);
if (!hist_field) {
- if (!system) {
- system = data->onmatch.match_event_system;
- event = data->onmatch.match_event;
+ if (!system && data->handler == HANDLER_ONMATCH) {
+ system = data->match_data.event_system;
+ event = data->match_data.event;
}
hist_field = find_event_var(hist_data, system, event, var);
}
if (!hist_field)
- hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var);
+ hist_err(tr, HIST_ERR_PARAM_NOT_FOUND, errpos(var));
return hist_field;
}
static struct hist_field *
-onmatch_create_field_var(struct hist_trigger_data *hist_data,
- struct action_data *data, char *system,
- char *event, char *var)
+trace_action_create_field_var(struct hist_trigger_data *hist_data,
+ struct action_data *data, char *system,
+ char *event, char *var)
{
struct hist_field *hist_field = NULL;
struct field_var *field_var;
@@ -3623,9 +4149,9 @@
* looking for fields on the onmatch(system.event.xxx)
* event.
*/
- if (!system) {
- system = data->onmatch.match_event_system;
- event = data->onmatch.match_event;
+ if (!system && data->handler == HANDLER_ONMATCH) {
+ system = data->match_data.event_system;
+ event = data->match_data.event;
}
/*
@@ -3647,26 +4173,32 @@
goto out;
}
-static int onmatch_create(struct hist_trigger_data *hist_data,
- struct trace_event_file *file,
- struct action_data *data)
+static int trace_action_create(struct hist_trigger_data *hist_data,
+ struct action_data *data)
{
+ struct trace_array *tr = hist_data->event_file->tr;
char *event_name, *param, *system = NULL;
struct hist_field *hist_field, *var_ref;
unsigned int i, var_ref_idx;
unsigned int field_pos = 0;
struct synth_event *event;
+ char *synth_event_name;
int ret = 0;
- mutex_lock(&synth_event_mutex);
- event = find_synth_event(data->onmatch.synth_event_name);
+ lockdep_assert_held(&event_mutex);
+
+ if (data->use_trace_keyword)
+ synth_event_name = data->synth_event_name;
+ else
+ synth_event_name = data->action_name;
+
+ event = find_synth_event(synth_event_name);
if (!event) {
- hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
- mutex_unlock(&synth_event_mutex);
+ hist_err(tr, HIST_ERR_SYNTH_EVENT_NOT_FOUND, errpos(synth_event_name));
return -EINVAL;
}
+
event->ref++;
- mutex_unlock(&synth_event_mutex);
var_ref_idx = hist_data->n_var_refs;
@@ -3693,13 +4225,15 @@
}
if (param[0] == '$')
- hist_field = onmatch_find_var(hist_data, data, system,
- event_name, param);
+ hist_field = trace_action_find_var(hist_data, data,
+ system, event_name,
+ param);
else
- hist_field = onmatch_create_field_var(hist_data, data,
- system,
- event_name,
- param);
+ hist_field = trace_action_create_field_var(hist_data,
+ data,
+ system,
+ event_name,
+ param);
if (!hist_field) {
kfree(p);
@@ -3708,49 +4242,112 @@
}
if (check_synth_field(event, hist_field, field_pos) == 0) {
- var_ref = create_var_ref(hist_field, system, event_name);
+ var_ref = create_var_ref(hist_data, hist_field,
+ system, event_name);
if (!var_ref) {
kfree(p);
ret = -ENOMEM;
goto err;
}
- save_synth_var_ref(hist_data, var_ref);
field_pos++;
kfree(p);
continue;
}
- hist_err_event("onmatch: Param type doesn't match synthetic event field type: ",
- system, event_name, param);
+ hist_err(tr, HIST_ERR_SYNTH_TYPE_MISMATCH, errpos(param));
kfree(p);
ret = -EINVAL;
goto err;
}
if (field_pos != event->n_fields) {
- hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name);
+ hist_err(tr, HIST_ERR_SYNTH_COUNT_MISMATCH, errpos(event->name));
ret = -EINVAL;
goto err;
}
- data->fn = action_trace;
- data->onmatch.synth_event = event;
- data->onmatch.var_ref_idx = var_ref_idx;
+ data->synth_event = event;
+ data->var_ref_idx = var_ref_idx;
out:
return ret;
err:
- mutex_lock(&synth_event_mutex);
event->ref--;
- mutex_unlock(&synth_event_mutex);
goto out;
}
+static int action_create(struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ struct trace_event_file *file = hist_data->event_file;
+ struct trace_array *tr = file->tr;
+ struct track_data *track_data;
+ struct field_var *field_var;
+ unsigned int i;
+ char *param;
+ int ret = 0;
+
+ if (data->action == ACTION_TRACE)
+ return trace_action_create(hist_data, data);
+
+ if (data->action == ACTION_SNAPSHOT) {
+ track_data = track_data_alloc(hist_data->key_size, data, hist_data);
+ if (IS_ERR(track_data)) {
+ ret = PTR_ERR(track_data);
+ goto out;
+ }
+
+ ret = tracing_snapshot_cond_enable(file->tr, track_data,
+ cond_snapshot_update);
+ if (ret)
+ track_data_free(track_data);
+
+ goto out;
+ }
+
+ if (data->action == ACTION_SAVE) {
+ if (hist_data->n_save_vars) {
+ ret = -EEXIST;
+ hist_err(tr, HIST_ERR_TOO_MANY_SAVE_ACTIONS, 0);
+ goto out;
+ }
+
+ for (i = 0; i < data->n_params; i++) {
+ param = kstrdup(data->params[i], GFP_KERNEL);
+ if (!param) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ field_var = create_target_field_var(hist_data, NULL, NULL, param);
+ if (IS_ERR(field_var)) {
+ hist_err(tr, HIST_ERR_FIELD_VAR_CREATE_FAIL,
+ errpos(param));
+ ret = PTR_ERR(field_var);
+ kfree(param);
+ goto out;
+ }
+
+ hist_data->save_vars[hist_data->n_save_vars++] = field_var;
+ if (field_var->val->flags & HIST_FIELD_FL_STRING)
+ hist_data->n_save_var_str++;
+ kfree(param);
+ }
+ }
+ out:
+ return ret;
+}
+
+static int onmatch_create(struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ return action_create(hist_data, data);
+}
+
static struct action_data *onmatch_parse(struct trace_array *tr, char *str)
{
char *match_event, *match_event_system;
- char *synth_event_name, *params;
struct action_data *data;
int ret = -EINVAL;
@@ -3760,59 +4357,34 @@
match_event = strsep(&str, ")");
if (!match_event || !str) {
- hist_err("onmatch: Missing closing paren: ", match_event);
+ hist_err(tr, HIST_ERR_NO_CLOSING_PAREN, errpos(match_event));
goto free;
}
match_event_system = strsep(&match_event, ".");
if (!match_event) {
- hist_err("onmatch: Missing subsystem for match event: ", match_event_system);
+ hist_err(tr, HIST_ERR_SUBSYS_NOT_FOUND, errpos(match_event_system));
goto free;
}
if (IS_ERR(event_file(tr, match_event_system, match_event))) {
- hist_err_event("onmatch: Invalid subsystem or event name: ",
- match_event_system, match_event, NULL);
+ hist_err(tr, HIST_ERR_INVALID_SUBSYS_EVENT, errpos(match_event));
goto free;
}
- data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
- if (!data->onmatch.match_event) {
+ data->match_data.event = kstrdup(match_event, GFP_KERNEL);
+ if (!data->match_data.event) {
ret = -ENOMEM;
goto free;
}
- data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL);
- if (!data->onmatch.match_event_system) {
+ data->match_data.event_system = kstrdup(match_event_system, GFP_KERNEL);
+ if (!data->match_data.event_system) {
ret = -ENOMEM;
goto free;
}
- strsep(&str, ".");
- if (!str) {
- hist_err("onmatch: Missing . after onmatch(): ", str);
- goto free;
- }
-
- synth_event_name = strsep(&str, "(");
- if (!synth_event_name || !str) {
- hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name);
- goto free;
- }
-
- data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
- if (!data->onmatch.synth_event_name) {
- ret = -ENOMEM;
- goto free;
- }
-
- params = strsep(&str, ")");
- if (!params || !str || (str && strlen(str))) {
- hist_err("onmatch: Missing closing paramlist paren: ", params);
- goto free;
- }
-
- ret = parse_action_params(params, data);
+ ret = action_parse(tr, str, data, HANDLER_ONMATCH);
if (ret)
goto free;
out:
@@ -3881,13 +4453,14 @@
struct trace_event_file *file,
char *var_name, char *expr_str)
{
+ struct trace_array *tr = hist_data->event_file->tr;
unsigned long flags = 0;
if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
return -EINVAL;
if (find_var(hist_data, file, var_name) && !hist_data->remove) {
- hist_err("Variable already defined: ", var_name);
+ hist_err(tr, HIST_ERR_DUPLICATE_VAR, errpos(var_name));
return -EINVAL;
}
@@ -3944,8 +4517,8 @@
struct trace_event_file *file,
char *field_str)
{
+ struct trace_array *tr = hist_data->event_file->tr;
struct hist_field *hist_field = NULL;
-
unsigned long flags = 0;
unsigned int key_size;
int ret = 0;
@@ -3967,8 +4540,8 @@
goto out;
}
- if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
- hist_err("Using variable references as keys not supported: ", field_str);
+ if (field_has_hist_vars(hist_field, 0)) {
+ hist_err(tr, HIST_ERR_INVALID_REF_KEY, errpos(field_str));
destroy_hist_field(hist_field, 0);
ret = -EINVAL;
goto out;
@@ -4069,6 +4642,7 @@
static int parse_var_defs(struct hist_trigger_data *hist_data)
{
+ struct trace_array *tr = hist_data->event_file->tr;
char *s, *str, *var_name, *field_str;
unsigned int i, j, n_vars = 0;
int ret = 0;
@@ -4082,13 +4656,14 @@
var_name = strsep(&field_str, "=");
if (!var_name || !field_str) {
- hist_err("Malformed assignment: ", var_name);
+ hist_err(tr, HIST_ERR_MALFORMED_ASSIGNMENT,
+ errpos(var_name));
ret = -EINVAL;
goto free;
}
if (n_vars == TRACING_MAP_VARS_MAX) {
- hist_err("Too many variables defined: ", var_name);
+ hist_err(tr, HIST_ERR_TOO_MANY_VARS, errpos(var_name));
ret = -EINVAL;
goto free;
}
@@ -4252,10 +4827,11 @@
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
- if (data->fn == action_trace)
+ if (data->handler == HANDLER_ONMATCH)
onmatch_destroy(data);
- else if (data->fn == onmax_save)
- onmax_destroy(data);
+ else if (data->handler == HANDLER_ONMAX ||
+ data->handler == HANDLER_ONCHANGE)
+ track_data_destroy(hist_data, data);
else
kfree(data);
}
@@ -4268,28 +4844,37 @@
unsigned int i;
int ret = 0;
char *str;
+ int len;
for (i = 0; i < hist_data->attrs->n_actions; i++) {
str = hist_data->attrs->action_str[i];
- if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
- char *action_str = str + strlen("onmatch(");
+ if ((len = str_has_prefix(str, "onmatch("))) {
+ char *action_str = str + len;
data = onmatch_parse(tr, action_str);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
break;
}
- data->fn = action_trace;
- } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
- char *action_str = str + strlen("onmax(");
+ } else if ((len = str_has_prefix(str, "onmax("))) {
+ char *action_str = str + len;
- data = onmax_parse(action_str);
+ data = track_data_parse(hist_data, action_str,
+ HANDLER_ONMAX);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
break;
}
- data->fn = onmax_save;
+ } else if ((len = str_has_prefix(str, "onchange("))) {
+ char *action_str = str + len;
+
+ data = track_data_parse(hist_data, action_str,
+ HANDLER_ONCHANGE);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ break;
+ }
} else {
ret = -EINVAL;
break;
@@ -4301,8 +4886,7 @@
return ret;
}
-static int create_actions(struct hist_trigger_data *hist_data,
- struct trace_event_file *file)
+static int create_actions(struct hist_trigger_data *hist_data)
{
struct action_data *data;
unsigned int i;
@@ -4311,14 +4895,18 @@
for (i = 0; i < hist_data->attrs->n_actions; i++) {
data = hist_data->actions[i];
- if (data->fn == action_trace) {
- ret = onmatch_create(hist_data, file, data);
+ if (data->handler == HANDLER_ONMATCH) {
+ ret = onmatch_create(hist_data, data);
if (ret)
- return ret;
- } else if (data->fn == onmax_save) {
- ret = onmax_create(hist_data, data);
+ break;
+ } else if (data->handler == HANDLER_ONMAX ||
+ data->handler == HANDLER_ONCHANGE) {
+ ret = track_data_create(hist_data, data);
if (ret)
- return ret;
+ break;
+ } else {
+ ret = -EINVAL;
+ break;
}
}
@@ -4334,26 +4922,51 @@
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
- if (data->fn == onmax_save)
- onmax_print(m, hist_data, elt, data);
+ if (data->action == ACTION_SNAPSHOT)
+ continue;
+
+ if (data->handler == HANDLER_ONMAX ||
+ data->handler == HANDLER_ONCHANGE)
+ track_data_print(m, hist_data, elt, data);
}
}
-static void print_onmax_spec(struct seq_file *m,
- struct hist_trigger_data *hist_data,
- struct action_data *data)
+static void print_action_spec(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct action_data *data)
{
unsigned int i;
- seq_puts(m, ":onmax(");
- seq_printf(m, "%s", data->onmax.var_str);
- seq_printf(m, ").%s(", data->onmax.fn_name);
-
- for (i = 0; i < hist_data->n_max_vars; i++) {
- seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name);
- if (i < hist_data->n_max_vars - 1)
- seq_puts(m, ",");
+ if (data->action == ACTION_SAVE) {
+ for (i = 0; i < hist_data->n_save_vars; i++) {
+ seq_printf(m, "%s", hist_data->save_vars[i]->var->var.name);
+ if (i < hist_data->n_save_vars - 1)
+ seq_puts(m, ",");
+ }
+ } else if (data->action == ACTION_TRACE) {
+ if (data->use_trace_keyword)
+ seq_printf(m, "%s", data->synth_event_name);
+ for (i = 0; i < data->n_params; i++) {
+ if (i || data->use_trace_keyword)
+ seq_puts(m, ",");
+ seq_printf(m, "%s", data->params[i]);
+ }
}
+}
+
+static void print_track_data_spec(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ struct action_data *data)
+{
+ if (data->handler == HANDLER_ONMAX)
+ seq_puts(m, ":onmax(");
+ else if (data->handler == HANDLER_ONCHANGE)
+ seq_puts(m, ":onchange(");
+ seq_printf(m, "%s", data->track_data.var_str);
+ seq_printf(m, ").%s(", data->action_name);
+
+ print_action_spec(m, hist_data, data);
+
seq_puts(m, ")");
}
@@ -4361,18 +4974,12 @@
struct hist_trigger_data *hist_data,
struct action_data *data)
{
- unsigned int i;
+ seq_printf(m, ":onmatch(%s.%s).", data->match_data.event_system,
+ data->match_data.event);
- seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system,
- data->onmatch.match_event);
+ seq_printf(m, "%s(", data->action_name);
- seq_printf(m, "%s(", data->onmatch.synth_event->name);
-
- for (i = 0; i < data->n_params; i++) {
- if (i)
- seq_puts(m, ",");
- seq_printf(m, "%s", data->params[i]);
- }
+ print_action_spec(m, hist_data, data);
seq_puts(m, ")");
}
@@ -4388,8 +4995,11 @@
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
struct action_data *data_test = hist_data_test->actions[i];
+ char *action_name, *action_name_test;
- if (data->fn != data_test->fn)
+ if (data->handler != data_test->handler)
+ return false;
+ if (data->action != data_test->action)
return false;
if (data->n_params != data_test->n_params)
@@ -4400,22 +5010,30 @@
return false;
}
- if (data->fn == action_trace) {
- if (strcmp(data->onmatch.synth_event_name,
- data_test->onmatch.synth_event_name) != 0)
+ if (data->use_trace_keyword)
+ action_name = data->synth_event_name;
+ else
+ action_name = data->action_name;
+
+ if (data_test->use_trace_keyword)
+ action_name_test = data_test->synth_event_name;
+ else
+ action_name_test = data_test->action_name;
+
+ if (strcmp(action_name, action_name_test) != 0)
+ return false;
+
+ if (data->handler == HANDLER_ONMATCH) {
+ if (strcmp(data->match_data.event_system,
+ data_test->match_data.event_system) != 0)
return false;
- if (strcmp(data->onmatch.match_event_system,
- data_test->onmatch.match_event_system) != 0)
+ if (strcmp(data->match_data.event,
+ data_test->match_data.event) != 0)
return false;
- if (strcmp(data->onmatch.match_event,
- data_test->onmatch.match_event) != 0)
- return false;
- } else if (data->fn == onmax_save) {
- if (strcmp(data->onmax.var_str,
- data_test->onmax.var_str) != 0)
- return false;
- if (strcmp(data->onmax.fn_name,
- data_test->onmax.fn_name) != 0)
+ } else if (data->handler == HANDLER_ONMAX ||
+ data->handler == HANDLER_ONCHANGE) {
+ if (strcmp(data->track_data.var_str,
+ data_test->track_data.var_str) != 0)
return false;
}
}
@@ -4432,10 +5050,11 @@
for (i = 0; i < hist_data->n_actions; i++) {
struct action_data *data = hist_data->actions[i];
- if (data->fn == action_trace)
+ if (data->handler == HANDLER_ONMATCH)
print_onmatch_spec(m, hist_data, data);
- else if (data->fn == onmax_save)
- print_onmax_spec(m, hist_data, data);
+ else if (data->handler == HANDLER_ONMAX ||
+ data->handler == HANDLER_ONCHANGE)
+ print_track_data_spec(m, hist_data, data);
}
}
@@ -4461,7 +5080,6 @@
destroy_actions(hist_data);
destroy_field_vars(hist_data);
destroy_field_var_hists(hist_data);
- destroy_synth_var_refs(hist_data);
kfree(hist_data);
}
@@ -4621,22 +5239,24 @@
/* ensure NULL-termination */
if (size > key_field->size - 1)
size = key_field->size - 1;
- }
- memcpy(compound_key + key_field->offset, key, size);
+ strncpy(compound_key + key_field->offset, (char *)key, size);
+ } else
+ memcpy(compound_key + key_field->offset, key, size);
}
static void
hist_trigger_actions(struct hist_trigger_data *hist_data,
struct tracing_map_elt *elt, void *rec,
- struct ring_buffer_event *rbe, u64 *var_ref_vals)
+ struct ring_buffer_event *rbe, void *key,
+ u64 *var_ref_vals)
{
struct action_data *data;
unsigned int i;
for (i = 0; i < hist_data->n_actions; i++) {
data = hist_data->actions[i];
- data->fn(hist_data, elt, rec, rbe, data, var_ref_vals);
+ data->fn(hist_data, elt, rec, rbe, key, data, var_ref_vals);
}
}
@@ -4649,7 +5269,6 @@
u64 var_ref_vals[TRACING_MAP_VARS_MAX];
char compound_key[HIST_KEY_SIZE_MAX];
struct tracing_map_elt *elt = NULL;
- struct stack_trace stacktrace;
struct hist_field *key_field;
u64 field_contents;
void *key = NULL;
@@ -4661,14 +5280,9 @@
key_field = hist_data->fields[i];
if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
- stacktrace.max_entries = HIST_STACKTRACE_DEPTH;
- stacktrace.entries = entries;
- stacktrace.nr_entries = 0;
- stacktrace.skip = HIST_STACKTRACE_SKIP;
-
- memset(stacktrace.entries, 0, HIST_STACKTRACE_SIZE);
- save_stack_trace(&stacktrace);
-
+ memset(entries, 0, HIST_STACKTRACE_SIZE);
+ stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+ HIST_STACKTRACE_SKIP);
key = entries;
} else {
field_contents = key_field->fn(key_field, elt, rbe, rec);
@@ -4697,7 +5311,7 @@
hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
if (resolve_var_refs(hist_data, key, var_ref_vals, true))
- hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals);
+ hist_trigger_actions(hist_data, elt, rec, rbe, key, var_ref_vals);
}
static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -4709,7 +5323,7 @@
unsigned int i;
for (i = 0; i < max_entries; i++) {
- if (stacktrace_entries[i] == ULONG_MAX)
+ if (!stacktrace_entries[i])
return;
seq_printf(m, "%*c", 1 + spaces, ' ');
@@ -4718,10 +5332,10 @@
}
}
-static void
-hist_trigger_entry_print(struct seq_file *m,
- struct hist_trigger_data *hist_data, void *key,
- struct tracing_map_elt *elt)
+static void hist_trigger_print_key(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ void *key,
+ struct tracing_map_elt *elt)
{
struct hist_field *key_field;
char str[KSYM_SYMBOL_LEN];
@@ -4797,6 +5411,17 @@
seq_puts(m, " ");
seq_puts(m, "}");
+}
+
+static void hist_trigger_entry_print(struct seq_file *m,
+ struct hist_trigger_data *hist_data,
+ void *key,
+ struct tracing_map_elt *elt)
+{
+ const char *field_name;
+ unsigned int i;
+
+ hist_trigger_print_key(m, hist_data, key, elt);
seq_printf(m, " hitcount: %10llu",
tracing_map_read_sum(elt, HITCOUNT_IDX));
@@ -4863,6 +5488,8 @@
if (n_entries < 0)
n_entries = 0;
+ track_data_snapshot_print(m, hist_data);
+
seq_printf(m, "\nTotals:\n Hits: %llu\n Entries: %u\n Dropped: %llu\n",
(u64)atomic64_read(&hist_data->map->hits),
n_entries, (u64)atomic64_read(&hist_data->map->drops));
@@ -4887,11 +5514,6 @@
hist_trigger_show(m, data, n++);
}
- if (have_hist_err()) {
- seq_printf(m, "\nERROR: %s\n", hist_err_str);
- seq_printf(m, " Last command: %s\n", last_hist_cmd);
- }
-
out_unlock:
mutex_unlock(&event_mutex);
@@ -4900,6 +5522,12 @@
static int event_hist_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return single_open(file, hist_show, file);
}
@@ -5256,6 +5884,7 @@
{
struct hist_trigger_data *hist_data = data->private_data;
struct event_trigger_data *test, *named_data = NULL;
+ struct trace_array *tr = file->tr;
int ret = 0;
if (hist_data->attrs->name) {
@@ -5263,7 +5892,7 @@
if (named_data) {
if (!hist_trigger_match(data, named_data, named_data,
true)) {
- hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name);
+ hist_err(tr, HIST_ERR_NAMED_MISMATCH, errpos(hist_data->attrs->name));
ret = -EINVAL;
goto out;
}
@@ -5284,7 +5913,7 @@
else if (hist_data->attrs->clear)
hist_clear(test);
else {
- hist_err("Hist trigger already exists", NULL);
+ hist_err(tr, HIST_ERR_TRIGGER_EEXIST, 0);
ret = -EEXIST;
}
goto out;
@@ -5292,7 +5921,7 @@
}
new:
if (hist_data->attrs->cont || hist_data->attrs->clear) {
- hist_err("Can't clear or continue a nonexistent hist trigger", NULL);
+ hist_err(tr, HIST_ERR_TRIGGER_ENOENT_CLEAR, 0);
ret = -ENOENT;
goto out;
}
@@ -5317,7 +5946,7 @@
ret = tracing_set_clock(file->tr, hist_data->attrs->clock);
if (ret) {
- hist_err("Couldn't set trace_clock: ", clock);
+ hist_err(tr, HIST_ERR_SET_CLOCK_FAIL, errpos(clock));
goto out;
}
@@ -5450,6 +6079,8 @@
struct synth_event *se;
const char *se_name;
+ lockdep_assert_held(&event_mutex);
+
if (hist_file_check_refs(file))
return;
@@ -5459,12 +6090,10 @@
list_del_rcu(&test->list);
trace_event_trigger_enable_disable(file, 0);
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref--;
- mutex_unlock(&synth_event_mutex);
update_cond_flag(file);
if (hist_data->enable_timestamps)
@@ -5490,9 +6119,11 @@
char *trigger, *p;
int ret = 0;
+ lockdep_assert_held(&event_mutex);
+
if (glob && strlen(glob)) {
- last_cmd_set(param);
hist_err_clear();
+ last_cmd_set(file, param);
}
if (!param)
@@ -5516,9 +6147,9 @@
p++;
continue;
}
- if (p >= param + strlen(param) - strlen("if") - 1)
+ if (p >= param + strlen(param) - (sizeof("if") - 1) - 1)
return -EINVAL;
- if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+ if (*(p + sizeof("if") - 1) != ' ' && *(p + sizeof("if") - 1) != '\t') {
p++;
continue;
}
@@ -5533,7 +6164,7 @@
trigger = strstrip(trigger);
}
- attrs = parse_hist_trigger_attrs(trigger);
+ attrs = parse_hist_trigger_attrs(file->tr, trigger);
if (IS_ERR(attrs))
return PTR_ERR(attrs);
@@ -5580,14 +6211,10 @@
}
cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
-
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref--;
- mutex_unlock(&synth_event_mutex);
-
ret = 0;
goto out_free;
}
@@ -5611,7 +6238,7 @@
if (has_hist_vars(hist_data))
save_hist_vars(hist_data);
- ret = create_actions(hist_data, file);
+ ret = create_actions(hist_data);
if (ret)
goto out_unreg;
@@ -5623,13 +6250,10 @@
if (ret)
goto out_unreg;
- mutex_lock(&synth_event_mutex);
se_name = trace_event_name(file->event_call);
se = find_synth_event(se_name);
if (se)
se->ref++;
- mutex_unlock(&synth_event_mutex);
-
/* Just return zero, not the number of registered triggers */
ret = 0;
out:
@@ -5812,6 +6436,12 @@
struct dentry *d_tracer;
int err = 0;
+ err = dyn_event_register(&synth_event_ops);
+ if (err) {
+ pr_warn("Could not register synth_event_ops\n");
+ return err;
+ }
+
d_tracer = tracing_init_dentry();
if (IS_ERR(d_tracer)) {
err = PTR_ERR(d_tracer);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index cd12ecb..2cd53ca 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -5,6 +5,7 @@
* Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com>
*/
+#include <linux/security.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -173,7 +174,11 @@
static int event_trigger_regex_open(struct inode *inode, struct file *file)
{
- int ret = 0;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
mutex_lock(&event_mutex);
@@ -292,6 +297,7 @@
static int
event_trigger_open(struct inode *inode, struct file *filp)
{
+ /* Checks for tracefs lockdown */
return event_trigger_regex_open(inode, filp);
}
@@ -731,7 +737,8 @@
goto out;
/* The filter is for the 'trigger' event, not the triggered event */
- ret = create_event_filter(file->event_call, filter_str, false, &filter);
+ ret = create_event_filter(file->tr, file->event_call,
+ filter_str, false, &filter);
/*
* If create_event_filter() fails, filter still needs to be freed.
* Which the calling code will do with data->filter.
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 086af4f..78af971 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -16,33 +16,6 @@
#include "trace.h"
#include "trace_output.h"
-static bool kill_ftrace_graph;
-
-/**
- * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
- *
- * ftrace_graph_stop() is called when a severe error is detected in
- * the function graph tracing. This function is called by the critical
- * paths of function graph to keep those paths from doing any more harm.
- */
-bool ftrace_graph_is_dead(void)
-{
- return kill_ftrace_graph;
-}
-
-/**
- * ftrace_graph_stop - set to permanently disable function graph tracincg
- *
- * In case of an error int function graph tracing, this is called
- * to try to keep function graph tracing from causing any more harm.
- * Usually this is pretty severe and this is called to try to at least
- * get a warning out to the user.
- */
-void ftrace_graph_stop(void)
-{
- kill_ftrace_graph = true;
-}
-
/* When set, irq functions will be ignored */
static int ftrace_graph_skip_irqs;
@@ -87,8 +60,12 @@
{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
+
+#ifdef CONFIG_FUNCTION_PROFILER
/* Include time within nested functions */
{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
+#endif
+
{ } /* Empty entry */
};
@@ -117,258 +94,6 @@
print_graph_duration(struct trace_array *tr, unsigned long long duration,
struct trace_seq *s, u32 flags);
-/* Add a function return address to the trace stack on thread info.*/
-static int
-ftrace_push_return_trace(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
-{
- unsigned long long calltime;
- int index;
-
- if (unlikely(ftrace_graph_is_dead()))
- return -EBUSY;
-
- if (!current->ret_stack)
- return -EBUSY;
-
- /*
- * We must make sure the ret_stack is tested before we read
- * anything else.
- */
- smp_rmb();
-
- /* The return trace stack is full */
- if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
- atomic_inc(¤t->trace_overrun);
- return -EBUSY;
- }
-
- /*
- * The curr_ret_stack is an index to ftrace return stack of
- * current task. Its value should be in [0, FTRACE_RETFUNC_
- * DEPTH) when the function graph tracer is used. To support
- * filtering out specific functions, it makes the index
- * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
- * so when it sees a negative index the ftrace will ignore
- * the record. And the index gets recovered when returning
- * from the filtered function by adding the FTRACE_NOTRACE_
- * DEPTH and then it'll continue to record functions normally.
- *
- * The curr_ret_stack is initialized to -1 and get increased
- * in this function. So it can be less than -1 only if it was
- * filtered out via ftrace_graph_notrace_addr() which can be
- * set from set_graph_notrace file in tracefs by user.
- */
- if (current->curr_ret_stack < -1)
- return -EBUSY;
-
- calltime = trace_clock_local();
-
- index = ++current->curr_ret_stack;
- if (ftrace_graph_notrace_addr(func))
- current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
- barrier();
- current->ret_stack[index].ret = ret;
- current->ret_stack[index].func = func;
- current->ret_stack[index].calltime = calltime;
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- current->ret_stack[index].fp = frame_pointer;
-#endif
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
- current->ret_stack[index].retp = retp;
-#endif
- return 0;
-}
-
-int function_graph_enter(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
-{
- struct ftrace_graph_ent trace;
-
- trace.func = func;
- trace.depth = ++current->curr_ret_depth;
-
- if (ftrace_push_return_trace(ret, func,
- frame_pointer, retp))
- goto out;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
- goto out_ret;
-
- return 0;
- out_ret:
- current->curr_ret_stack--;
- out:
- current->curr_ret_depth--;
- return -EBUSY;
-}
-
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
- unsigned long frame_pointer)
-{
- int index;
-
- index = current->curr_ret_stack;
-
- /*
- * A negative index here means that it's just returned from a
- * notrace'd function. Recover index to get an original
- * return address. See ftrace_push_return_trace().
- *
- * TODO: Need to check whether the stack gets corrupted.
- */
- if (index < 0)
- index += FTRACE_NOTRACE_DEPTH;
-
- if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
- ftrace_graph_stop();
- WARN_ON(1);
- /* Might as well panic, otherwise we have no where to go */
- *ret = (unsigned long)panic;
- return;
- }
-
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- /*
- * The arch may choose to record the frame pointer used
- * and check it here to make sure that it is what we expect it
- * to be. If gcc does not set the place holder of the return
- * address in the frame pointer, and does a copy instead, then
- * the function graph trace will fail. This test detects this
- * case.
- *
- * Currently, x86_32 with optimize for size (-Os) makes the latest
- * gcc do the above.
- *
- * Note, -mfentry does not use frame pointers, and this test
- * is not needed if CC_USING_FENTRY is set.
- */
- if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
- ftrace_graph_stop();
- WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
- " from func %ps return to %lx\n",
- current->ret_stack[index].fp,
- frame_pointer,
- (void *)current->ret_stack[index].func,
- current->ret_stack[index].ret);
- *ret = (unsigned long)panic;
- return;
- }
-#endif
-
- *ret = current->ret_stack[index].ret;
- trace->func = current->ret_stack[index].func;
- trace->calltime = current->ret_stack[index].calltime;
- trace->overrun = atomic_read(¤t->trace_overrun);
- trace->depth = current->curr_ret_depth--;
- /*
- * We still want to trace interrupts coming in if
- * max_depth is set to 1. Make sure the decrement is
- * seen before ftrace_graph_return.
- */
- barrier();
-}
-
-/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
- */
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
-{
- struct ftrace_graph_ret trace;
- unsigned long ret;
-
- ftrace_pop_return_trace(&trace, &ret, frame_pointer);
- trace.rettime = trace_clock_local();
- ftrace_graph_return(&trace);
- /*
- * The ftrace_graph_return() may still access the current
- * ret_stack structure, we need to make sure the update of
- * curr_ret_stack is after that.
- */
- barrier();
- current->curr_ret_stack--;
- /*
- * The curr_ret_stack can be less than -1 only if it was
- * filtered out and it's about to return from the function.
- * Recover the index and continue to trace normal functions.
- */
- if (current->curr_ret_stack < -1) {
- current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
- return ret;
- }
-
- if (unlikely(!ret)) {
- ftrace_graph_stop();
- WARN_ON(1);
- /* Might as well panic. What else to do? */
- ret = (unsigned long)panic;
- }
-
- return ret;
-}
-
-/**
- * ftrace_graph_ret_addr - convert a potentially modified stack return address
- * to its original value
- *
- * This function can be called by stack unwinding code to convert a found stack
- * return address ('ret') to its original value, in case the function graph
- * tracer has modified it to be 'return_to_handler'. If the address hasn't
- * been modified, the unchanged value of 'ret' is returned.
- *
- * 'idx' is a state variable which should be initialized by the caller to zero
- * before the first call.
- *
- * 'retp' is a pointer to the return address on the stack. It's ignored if
- * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
- */
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
- unsigned long ret, unsigned long *retp)
-{
- int index = task->curr_ret_stack;
- int i;
-
- if (ret != (unsigned long)return_to_handler)
- return ret;
-
- if (index < -1)
- index += FTRACE_NOTRACE_DEPTH;
-
- if (index < 0)
- return ret;
-
- for (i = 0; i <= index; i++)
- if (task->ret_stack[i].retp == retp)
- return task->ret_stack[i].ret;
-
- return ret;
-}
-#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
- unsigned long ret, unsigned long *retp)
-{
- int task_idx;
-
- if (ret != (unsigned long)return_to_handler)
- return ret;
-
- task_idx = task->curr_ret_stack;
-
- if (!task->ret_stack || task_idx < *idx)
- return ret;
-
- task_idx -= *idx;
- (*idx)++;
-
- return task->ret_stack[task_idx].ret;
-}
-#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-
int __trace_graph_entry(struct trace_array *tr,
struct ftrace_graph_ent *trace,
unsigned long flags,
@@ -409,13 +134,7 @@
int cpu;
int pc;
- if (!ftrace_trace_task(tr))
- return 0;
-
- if (ftrace_graph_ignore_func(trace))
- return 0;
-
- if (ftrace_graph_ignore_irqs())
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
return 0;
/*
@@ -425,8 +144,23 @@
* to recover the original index in order to continue tracing after
* returning from the function.
*/
- if (ftrace_graph_notrace_addr(trace->func))
+ if (ftrace_graph_notrace_addr(trace->func)) {
+ trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
+ /*
+ * Need to return 1 to have the return called
+ * that will clear the NOTRACE bit.
+ */
return 1;
+ }
+
+ if (!ftrace_trace_task(tr))
+ return 0;
+
+ if (ftrace_graph_ignore_func(trace))
+ return 0;
+
+ if (ftrace_graph_ignore_irqs())
+ return 0;
/*
* Stop here if tracing_threshold is set. We only write function return
@@ -511,6 +245,11 @@
ftrace_graph_addr_finish(trace);
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ return;
+ }
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -536,6 +275,11 @@
{
ftrace_graph_addr_finish(trace);
+ if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
+ trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ return;
+ }
+
if (tracing_thresh &&
(trace->rettime - trace->calltime < tracing_thresh))
return;
@@ -543,17 +287,25 @@
trace_graph_return(trace);
}
+static struct fgraph_ops funcgraph_thresh_ops = {
+ .entryfunc = &trace_graph_entry,
+ .retfunc = &trace_graph_thresh_return,
+};
+
+static struct fgraph_ops funcgraph_ops = {
+ .entryfunc = &trace_graph_entry,
+ .retfunc = &trace_graph_return,
+};
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
set_graph_array(tr);
if (tracing_thresh)
- ret = register_ftrace_graph(&trace_graph_thresh_return,
- &trace_graph_entry);
+ ret = register_ftrace_graph(&funcgraph_thresh_ops);
else
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry);
+ ret = register_ftrace_graph(&funcgraph_ops);
if (ret)
return ret;
tracing_start_cmdline_record();
@@ -564,7 +316,10 @@
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
- unregister_ftrace_graph();
+ if (tracing_thresh)
+ unregister_ftrace_graph(&funcgraph_thresh_ops);
+ else
+ unregister_ftrace_graph(&funcgraph_ops);
}
static int graph_trace_update_thresh(struct trace_array *tr)
@@ -622,6 +377,7 @@
{
trace_seq_putc(s, ' ');
trace_print_lat_fmt(s, entry);
+ trace_seq_puts(s, " | ");
}
/* If the pid changed since the last trace, output this event */
@@ -743,6 +499,17 @@
}
static void
+print_graph_rel_time(struct trace_iterator *iter, struct trace_seq *s)
+{
+ unsigned long long usecs;
+
+ usecs = iter->ts - iter->trace_buffer->time_start;
+ do_div(usecs, NSEC_PER_USEC);
+
+ trace_seq_printf(s, "%9llu us | ", usecs);
+}
+
+static void
print_graph_irq(struct trace_iterator *iter, unsigned long addr,
enum trace_type type, int cpu, pid_t pid, u32 flags)
{
@@ -759,6 +526,10 @@
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
print_graph_abs_time(iter->ts, s);
+ /* Relative time */
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
+ print_graph_rel_time(iter, s);
+
/* Cpu */
if (flags & TRACE_GRAPH_PRINT_CPU)
print_graph_cpu(s, cpu);
@@ -874,10 +645,6 @@
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
- /* If a graph tracer ignored set_graph_notrace */
- if (call->depth < -1)
- call->depth += FTRACE_NOTRACE_DEPTH;
-
/*
* Comments display at + 1 to depth. Since
* this is a leaf function, keep the comments
@@ -920,10 +687,6 @@
struct fgraph_cpu_data *cpu_data;
int cpu = iter->cpu;
- /* If a graph tracer ignored set_graph_notrace */
- if (call->depth < -1)
- call->depth += FTRACE_NOTRACE_DEPTH;
-
cpu_data = per_cpu_ptr(data->cpu_data, cpu);
cpu_data->depth = call->depth;
@@ -975,6 +738,10 @@
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
print_graph_abs_time(iter->ts, s);
+ /* Relative time */
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
+ print_graph_rel_time(iter, s);
+
/* Cpu */
if (flags & TRACE_GRAPH_PRINT_CPU)
print_graph_cpu(s, cpu);
@@ -1351,6 +1118,8 @@
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
size += 16;
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
+ size += 16;
if (flags & TRACE_GRAPH_PRINT_CPU)
size += 4;
if (flags & TRACE_GRAPH_PRINT_PROC)
@@ -1375,12 +1144,14 @@
seq_putc(s, '#');
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
seq_puts(s, " TIME ");
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
+ seq_puts(s, " REL TIME ");
if (flags & TRACE_GRAPH_PRINT_CPU)
seq_puts(s, " CPU");
if (flags & TRACE_GRAPH_PRINT_PROC)
seq_puts(s, " TASK/PID ");
if (lat)
- seq_puts(s, "||||");
+ seq_puts(s, "|||| ");
if (flags & TRACE_GRAPH_PRINT_DURATION)
seq_puts(s, " DURATION ");
seq_puts(s, " FUNCTION CALLS\n");
@@ -1389,12 +1160,14 @@
seq_putc(s, '#');
if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
seq_puts(s, " | ");
+ if (flags & TRACE_GRAPH_PRINT_REL_TIME)
+ seq_puts(s, " | ");
if (flags & TRACE_GRAPH_PRINT_CPU)
seq_puts(s, " | ");
if (flags & TRACE_GRAPH_PRINT_PROC)
seq_puts(s, " | | ");
if (lat)
- seq_puts(s, "||||");
+ seq_puts(s, "|||| ");
if (flags & TRACE_GRAPH_PRINT_DURATION)
seq_puts(s, " | | ");
seq_puts(s, " | | | |\n");
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 1e6db9c..862f4b0 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -150,7 +150,7 @@
if (enter)
nmi_ts_start = time_get();
else
- nmi_total_ts = time_get() - nmi_ts_start;
+ nmi_total_ts += time_get() - nmi_ts_start;
}
if (enter)
@@ -256,6 +256,8 @@
/* Keep a running maximum ever recorded hardware latency */
if (sample > tr->max_latency)
tr->max_latency = sample;
+ if (outer_sample > tr->max_latency)
+ tr->max_latency = outer_sample;
}
out:
@@ -277,7 +279,7 @@
* of this thread, than stop migrating for the duration
* of the current test.
*/
- if (!cpumask_equal(current_mask, ¤t->cpus_allowed))
+ if (!cpumask_equal(current_mask, current->cpus_ptr))
goto disable;
get_online_cpus();
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 98ea6d2..a745b0c 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include "trace.h"
@@ -218,6 +219,11 @@
atomic_dec(&data->disabled);
}
+static struct fgraph_ops fgraph_ops = {
+ .entryfunc = &irqsoff_graph_entry,
+ .retfunc = &irqsoff_graph_return,
+};
+
static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
@@ -233,7 +239,7 @@
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
TRACE_GRAPH_PRINT_PROC | \
- TRACE_GRAPH_PRINT_ABS_TIME | \
+ TRACE_GRAPH_PRINT_REL_TIME | \
TRACE_GRAPH_PRINT_DURATION)
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
@@ -272,13 +278,6 @@
#else
#define __trace_function trace_function
-#ifdef CONFIG_FUNCTION_TRACER
-static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
-{
- return -1;
-}
-#endif
-
static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
{
return TRACE_TYPE_UNHANDLED;
@@ -288,7 +287,6 @@
static void irqsoff_trace_close(struct trace_iterator *iter) { }
#ifdef CONFIG_FUNCTION_TRACER
-static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
static void irqsoff_print_header(struct seq_file *s)
{
trace_default_header(s);
@@ -368,7 +366,7 @@
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
}
-static inline void
+static nokprobe_inline void
start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@@ -404,7 +402,7 @@
atomic_dec(&data->disabled);
}
-static inline void
+static nokprobe_inline void
stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc)
{
int cpu;
@@ -446,6 +444,7 @@
start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(start_critical_timings);
+NOKPROBE_SYMBOL(start_critical_timings);
void stop_critical_timings(void)
{
@@ -455,6 +454,7 @@
stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc);
}
EXPORT_SYMBOL_GPL(stop_critical_timings);
+NOKPROBE_SYMBOL(stop_critical_timings);
#ifdef CONFIG_FUNCTION_TRACER
static bool function_enabled;
@@ -468,8 +468,7 @@
return 0;
if (graph)
- ret = register_ftrace_graph(&irqsoff_graph_return,
- &irqsoff_graph_entry);
+ ret = register_ftrace_graph(&fgraph_ops);
else
ret = register_ftrace_function(tr->ops);
@@ -485,7 +484,7 @@
return;
if (graph)
- unregister_ftrace_graph();
+ unregister_ftrace_graph(&fgraph_ops);
else
unregister_ftrace_function(tr->ops);
@@ -615,6 +614,7 @@
if (!preempt_trace(pc) && irq_trace())
stop_critical_timing(a0, a1, pc);
}
+NOKPROBE_SYMBOL(tracer_hardirqs_on);
void tracer_hardirqs_off(unsigned long a0, unsigned long a1)
{
@@ -623,6 +623,7 @@
if (!preempt_trace(pc) && irq_trace())
start_critical_timing(a0, a1, pc);
}
+NOKPROBE_SYMBOL(tracer_hardirqs_off);
static int irqsoff_tracer_init(struct trace_array *tr)
{
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index d953c16..cca6504 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -17,48 +17,42 @@
#include "trace.h"
#include "trace_output.h"
-static void ftrace_dump_buf(int skip_lines, long cpu_file)
+static struct trace_iterator iter;
+static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS];
+
+static void ftrace_dump_buf(int skip_entries, long cpu_file)
{
- /* use static because iter can be a bit big for the stack */
- static struct trace_iterator iter;
- static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS];
struct trace_array *tr;
unsigned int old_userobj;
int cnt = 0, cpu;
- trace_init_global_iter(&iter);
- iter.buffer_iter = buffer_iter;
tr = iter.tr;
- for_each_tracing_cpu(cpu) {
- atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
- }
-
old_userobj = tr->trace_flags;
/* don't look at user memory in panic mode */
tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
kdb_printf("Dumping ftrace buffer:\n");
+ if (skip_entries)
+ kdb_printf("(skipping %d entries)\n", skip_entries);
- /* reset all but tr, trace, and overruns */
- memset(&iter.seq, 0,
- sizeof(struct trace_iterator) -
- offsetof(struct trace_iterator, seq));
+ trace_iterator_reset(&iter);
iter.iter_flags |= TRACE_FILE_LAT_FMT;
- iter.pos = -1;
if (cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter.buffer_iter[cpu] =
- ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
+ ring_buffer_read_prepare(iter.trace_buffer->buffer,
+ cpu, GFP_ATOMIC);
ring_buffer_read_start(iter.buffer_iter[cpu]);
tracing_iter_reset(&iter, cpu);
}
} else {
iter.cpu_file = cpu_file;
iter.buffer_iter[cpu_file] =
- ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
+ ring_buffer_read_prepare(iter.trace_buffer->buffer,
+ cpu_file, GFP_ATOMIC);
ring_buffer_read_start(iter.buffer_iter[cpu_file]);
tracing_iter_reset(&iter, cpu_file);
}
@@ -68,11 +62,11 @@
kdb_printf("---------------------------------\n");
cnt++;
- if (!skip_lines) {
+ if (!skip_entries) {
print_trace_line(&iter);
trace_printk_seq(&iter.seq);
} else {
- skip_lines--;
+ skip_entries--;
}
if (KDB_FLAG(CMD_INTERRUPT))
@@ -88,10 +82,6 @@
tr->trace_flags = old_userobj;
for_each_tracing_cpu(cpu) {
- atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
- }
-
- for_each_tracing_cpu(cpu) {
if (iter.buffer_iter[cpu]) {
ring_buffer_read_finish(iter.buffer_iter[cpu]);
iter.buffer_iter[cpu] = NULL;
@@ -104,17 +94,19 @@
*/
static int kdb_ftdump(int argc, const char **argv)
{
- int skip_lines = 0;
+ int skip_entries = 0;
long cpu_file;
char *cp;
+ int cnt;
+ int cpu;
if (argc > 2)
return KDB_ARGCOUNT;
if (argc) {
- skip_lines = simple_strtol(argv[1], &cp, 0);
+ skip_entries = simple_strtol(argv[1], &cp, 0);
if (*cp)
- skip_lines = 0;
+ skip_entries = 0;
}
if (argc == 2) {
@@ -127,7 +119,29 @@
}
kdb_trap_printk++;
- ftrace_dump_buf(skip_lines, cpu_file);
+
+ trace_init_global_iter(&iter);
+ iter.buffer_iter = buffer_iter;
+
+ for_each_tracing_cpu(cpu) {
+ atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
+ }
+
+ /* A negative skip_entries means skip all but the last entries */
+ if (skip_entries < 0) {
+ if (cpu_file == RING_BUFFER_ALL_CPUS)
+ cnt = trace_total_entries(NULL);
+ else
+ cnt = trace_total_entries_cpu(NULL, cpu_file);
+ skip_entries = max(cnt + skip_entries, 0);
+ }
+
+ ftrace_dump_buf(skip_entries, cpu_file);
+
+ for_each_tracing_cpu(cpu) {
+ atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
+ }
+
kdb_trap_printk--;
return 0;
@@ -135,8 +149,9 @@
static __init int kdb_ftrace_register(void)
{
- kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
- "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE);
+ kdb_register_flags("ftdump", kdb_ftdump, "[skip_#entries] [cpu]",
+ "Dump ftrace log; -skip dumps last #entries", 0,
+ KDB_ENABLE_ALWAYS_SAFE);
return 0;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f9a0cd0..1552a95 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -7,28 +7,79 @@
*/
#define pr_fmt(fmt) "trace_kprobe: " fmt
+#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/rculist.h>
#include <linux/error-injection.h>
+#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
+
+#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
+#define MAX_KPROBE_CMDLINE_SIZE 1024
-/**
+/* Kprobe early definition from command line */
+static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
+static bool kprobe_boot_events_enabled __initdata;
+
+static int __init set_kprobe_boot_events(char *str)
+{
+ strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+ return 0;
+}
+__setup("kprobe_event=", set_kprobe_boot_events);
+
+static int trace_kprobe_create(int argc, const char **argv);
+static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_kprobe_release(struct dyn_event *ev);
+static bool trace_kprobe_is_busy(struct dyn_event *ev);
+static bool trace_kprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev);
+
+static struct dyn_event_operations trace_kprobe_ops = {
+ .create = trace_kprobe_create,
+ .show = trace_kprobe_show,
+ .is_busy = trace_kprobe_is_busy,
+ .free = trace_kprobe_release,
+ .match = trace_kprobe_match,
+};
+
+/*
* Kprobe event core functions
*/
struct trace_kprobe {
- struct list_head list;
+ struct dyn_event devent;
struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long __percpu *nhit;
const char *symbol; /* symbol name */
struct trace_probe tp;
};
+static bool is_trace_kprobe(struct dyn_event *ev)
+{
+ return ev->ops == &trace_kprobe_ops;
+}
+
+static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
+{
+ return container_of(ev, struct trace_kprobe, devent);
+}
+
+/**
+ * for_each_trace_kprobe - iterate over the trace_kprobe list
+ * @pos: the struct trace_kprobe * for each entry
+ * @dpos: the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_kprobe(pos, dpos) \
+ for_each_dyn_event(dpos) \
+ if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
+
#define SIZEOF_TRACE_KPROBE(n) \
(offsetof(struct trace_kprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
@@ -80,6 +131,45 @@
return ret;
}
+static bool trace_kprobe_is_busy(struct dyn_event *ev)
+{
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+
+ return trace_probe_is_enabled(&tk->tp);
+}
+
+static bool trace_kprobe_match_command_head(struct trace_kprobe *tk,
+ int argc, const char **argv)
+{
+ char buf[MAX_ARGSTR_LEN + 1];
+
+ if (!argc)
+ return true;
+
+ if (!tk->symbol)
+ snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr);
+ else if (tk->rp.kp.offset)
+ snprintf(buf, sizeof(buf), "%s+%u",
+ trace_kprobe_symbol(tk), tk->rp.kp.offset);
+ else
+ snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk));
+ if (strcmp(buf, argv[0]))
+ return false;
+ argc--; argv++;
+
+ return trace_probe_match_command_args(&tk->tp, argc, argv);
+}
+
+static bool trace_kprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev)
+{
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+
+ return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) &&
+ trace_kprobe_match_command_head(tk, argc, argv);
+}
+
static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
{
unsigned long nhit = 0;
@@ -91,6 +181,12 @@
return nhit;
}
+static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
+{
+ return !(list_empty(&tk->rp.kp.list) &&
+ hlist_unhashed(&tk->rp.kp.hlist));
+}
+
/* Return 0 if it fails to find the symbol address */
static nokprobe_inline
unsigned long trace_kprobe_address(struct trace_kprobe *tk)
@@ -108,209 +204,51 @@
return addr;
}
+static nokprobe_inline struct trace_kprobe *
+trace_kprobe_primary_from_call(struct trace_event_call *call)
+{
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return NULL;
+
+ return container_of(tp, struct trace_kprobe, tp);
+}
+
bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
- struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
- return kprobe_on_func_entry(tk->rp.kp.addr,
+ return tk ? kprobe_on_func_entry(tk->rp.kp.addr,
tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
- tk->rp.kp.addr ? 0 : tk->rp.kp.offset);
+ tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false;
}
bool trace_kprobe_error_injectable(struct trace_event_call *call)
{
- struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+ struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
- return within_error_injection_list(trace_kprobe_address(tk));
+ return tk ? within_error_injection_list(trace_kprobe_address(tk)) :
+ false;
}
static int register_kprobe_event(struct trace_kprobe *tk);
static int unregister_kprobe_event(struct trace_kprobe *tk);
-static DEFINE_MUTEX(probe_lock);
-static LIST_HEAD(probe_list);
-
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_dispatcher(struct kretprobe_instance *ri,
struct pt_regs *regs);
-/* Memory fetching by symbol */
-struct symbol_cache {
- char *symbol;
- long offset;
- unsigned long addr;
-};
-
-unsigned long update_symbol_cache(struct symbol_cache *sc)
+static void free_trace_kprobe(struct trace_kprobe *tk)
{
- sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
-
- if (sc->addr)
- sc->addr += sc->offset;
-
- return sc->addr;
-}
-
-void free_symbol_cache(struct symbol_cache *sc)
-{
- kfree(sc->symbol);
- kfree(sc);
-}
-
-struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
-{
- struct symbol_cache *sc;
-
- if (!sym || strlen(sym) == 0)
- return NULL;
-
- sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
- if (!sc)
- return NULL;
-
- sc->symbol = kstrdup(sym, GFP_KERNEL);
- if (!sc->symbol) {
- kfree(sc);
- return NULL;
- }
- sc->offset = offset;
- update_symbol_cache(sc);
-
- return sc;
-}
-
-/*
- * Kprobes-specific fetch functions
- */
-#define DEFINE_FETCH_stack(type) \
-static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
- (unsigned int)((unsigned long)offset)); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
-
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
-
-#define DEFINE_FETCH_memory(type) \
-static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
- void *addr, void *dest) \
-{ \
- type retval; \
- if (probe_kernel_address(addr, retval)) \
- *(type *)dest = 0; \
- else \
- *(type *)dest = retval; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
-
-DEFINE_BASIC_FETCH_FUNCS(memory)
-/*
- * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
- * length and relative data location.
- */
-static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- long ret;
-
- if (!maxlen)
- return;
-
- /*
- * Try to get string again, since the string can be changed while
- * probing.
- */
- ret = strncpy_from_unsafe(dst, addr, maxlen);
-
- if (ret < 0) { /* Failed to fetch string */
- dst[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
- } else {
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
+ if (tk) {
+ trace_probe_cleanup(&tk->tp);
+ kfree(tk->symbol);
+ free_percpu(tk->nhit);
+ kfree(tk);
}
}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
-
-/* Return the length of string -- including null terminal byte */
-static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
-{
- mm_segment_t old_fs;
- int ret, len = 0;
- u8 c;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- pagefault_disable();
-
- do {
- ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
- len++;
- } while (c && ret == 0 && len < MAX_STRING_SIZE);
-
- pagefault_enable();
- set_fs(old_fs);
-
- if (ret < 0) /* Failed to check the length */
- *(u32 *)dest = 0;
- else
- *(u32 *)dest = len;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
-
-#define DEFINE_FETCH_symbol(type) \
-void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
-{ \
- struct symbol_cache *sc = data; \
- if (sc->addr) \
- fetch_memory_##type(regs, (void *)sc->addr, dest); \
- else \
- *(type *)dest = 0; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
-
-DEFINE_BASIC_FETCH_FUNCS(symbol)
-DEFINE_FETCH_symbol(string)
-DEFINE_FETCH_symbol(string_size)
-
-/* kprobes don't support file_offset fetch methods */
-#define fetch_file_offset_u8 NULL
-#define fetch_file_offset_u16 NULL
-#define fetch_file_offset_u32 NULL
-#define fetch_file_offset_u64 NULL
-#define fetch_file_offset_string NULL
-#define fetch_file_offset_string_size NULL
-
-/* Fetch type information table */
-static const struct fetch_type kprobes_fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
- ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
-
- ASSIGN_FETCH_TYPE_END
-};
/*
* Allocate new trace_probe and initialize it (including kprobes).
@@ -349,59 +287,29 @@
tk->rp.kp.pre_handler = kprobe_dispatcher;
tk->rp.maxactive = maxactive;
+ INIT_HLIST_NODE(&tk->rp.kp.hlist);
+ INIT_LIST_HEAD(&tk->rp.kp.list);
- if (!event || !is_good_name(event)) {
- ret = -EINVAL;
- goto error;
- }
-
- tk->tp.call.class = &tk->tp.class;
- tk->tp.call.name = kstrdup(event, GFP_KERNEL);
- if (!tk->tp.call.name)
+ ret = trace_probe_init(&tk->tp, event, group);
+ if (ret < 0)
goto error;
- if (!group || !is_good_name(group)) {
- ret = -EINVAL;
- goto error;
- }
-
- tk->tp.class.system = kstrdup(group, GFP_KERNEL);
- if (!tk->tp.class.system)
- goto error;
-
- INIT_LIST_HEAD(&tk->list);
- INIT_LIST_HEAD(&tk->tp.files);
+ dyn_event_init(&tk->devent, &trace_kprobe_ops);
return tk;
error:
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
+ free_trace_kprobe(tk);
return ERR_PTR(ret);
}
-static void free_trace_kprobe(struct trace_kprobe *tk)
-{
- int i;
-
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_free_probe_arg(&tk->tp.args[i]);
-
- kfree(tk->tp.call.class->system);
- kfree(tk->tp.call.name);
- kfree(tk->symbol);
- free_percpu(tk->nhit);
- kfree(tk);
-}
-
static struct trace_kprobe *find_trace_kprobe(const char *event,
const char *group)
{
+ struct dyn_event *pos;
struct trace_kprobe *tk;
- list_for_each_entry(tk, &probe_list, list)
- if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
- strcmp(tk->tp.call.class->system, group) == 0)
+ for_each_trace_kprobe(tk, pos)
+ if (strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+ strcmp(trace_probe_group_name(&tk->tp), group) == 0)
return tk;
return NULL;
}
@@ -410,7 +318,7 @@
{
int ret = 0;
- if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
+ if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) {
if (trace_kprobe_is_return(tk))
ret = enable_kretprobe(&tk->rp);
else
@@ -420,41 +328,70 @@
return ret;
}
+static void __disable_trace_kprobe(struct trace_probe *tp)
+{
+ struct trace_probe *pos;
+ struct trace_kprobe *tk;
+
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ tk = container_of(pos, struct trace_kprobe, tp);
+ if (!trace_kprobe_is_registered(tk))
+ continue;
+ if (trace_kprobe_is_return(tk))
+ disable_kretprobe(&tk->rp);
+ else
+ disable_kprobe(&tk->rp.kp);
+ }
+}
+
/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
*/
-static int
-enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
+static int enable_trace_kprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
{
- struct event_file_link *link;
+ struct trace_probe *pos, *tp;
+ struct trace_kprobe *tk;
+ bool enabled;
int ret = 0;
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+ enabled = trace_probe_is_enabled(tp);
+
+ /* This also changes "enabled" state */
if (file) {
- link = kmalloc(sizeof(*link), GFP_KERNEL);
- if (!link) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = trace_probe_add_file(tp, file);
+ if (ret)
+ return ret;
+ } else
+ trace_probe_set_flag(tp, TP_FLAG_PROFILE);
- link->file = file;
- list_add_tail_rcu(&link->list, &tk->tp.files);
+ if (enabled)
+ return 0;
- tk->tp.flags |= TP_FLAG_TRACE;
- ret = __enable_trace_kprobe(tk);
- if (ret) {
- list_del_rcu(&link->list);
- kfree(link);
- tk->tp.flags &= ~TP_FLAG_TRACE;
- }
-
- } else {
- tk->tp.flags |= TP_FLAG_PROFILE;
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ tk = container_of(pos, struct trace_kprobe, tp);
+ if (trace_kprobe_has_gone(tk))
+ continue;
ret = __enable_trace_kprobe(tk);
if (ret)
- tk->tp.flags &= ~TP_FLAG_PROFILE;
+ break;
+ enabled = true;
}
- out:
+
+ if (ret) {
+ /* Failed to enable one of them. Roll back all */
+ if (enabled)
+ __disable_trace_kprobe(tp);
+ if (file)
+ trace_probe_remove_file(tp, file);
+ else
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+ }
+
return ret;
}
@@ -462,59 +399,38 @@
* Disable trace_probe
* if the file is NULL, disable "perf" handler, or disable "trace" handler.
*/
-static int
-disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
+static int disable_trace_kprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
{
- struct event_file_link *link = NULL;
- int wait = 0;
- int ret = 0;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
if (file) {
- link = find_event_file_link(&tk->tp, file);
- if (!link) {
- ret = -EINVAL;
+ if (!trace_probe_get_file_link(tp, file))
+ return -ENOENT;
+ if (!trace_probe_has_single_file(tp))
goto out;
- }
-
- list_del_rcu(&link->list);
- wait = 1;
- if (!list_empty(&tk->tp.files))
- goto out;
-
- tk->tp.flags &= ~TP_FLAG_TRACE;
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
} else
- tk->tp.flags &= ~TP_FLAG_PROFILE;
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
- if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
- if (trace_kprobe_is_return(tk))
- disable_kretprobe(&tk->rp);
- else
- disable_kprobe(&tk->rp.kp);
- wait = 1;
- }
+ if (!trace_probe_is_enabled(tp))
+ __disable_trace_kprobe(tp);
- /*
- * if tk is not added to any list, it must be a local trace_kprobe
- * created with perf_event_open. We don't need to wait for these
- * trace_kprobes
- */
- if (list_empty(&tk->list))
- wait = 0;
out:
- if (wait) {
+ if (file)
/*
- * Synchronize with kprobe_trace_func/kretprobe_trace_func
- * to ensure disabled (all running handlers are finished).
- * This is not only for kfree(), but also the caller,
- * trace_remove_event_call() supposes it for releasing
- * event_call related objects, which will be accessed in
- * the kprobe_trace_func/kretprobe_trace_func.
+ * Synchronization is done in below function. For perf event,
+ * file == NULL and perf_trace_event_unreg() calls
+ * tracepoint_synchronize_unregister() to ensure synchronize
+ * event. We don't need to care about it.
*/
- synchronize_sched();
- kfree(link); /* Ignored if link == NULL */
- }
+ trace_probe_remove_file(tp, file);
- return ret;
+ return 0;
}
#if defined(CONFIG_KPROBES_ON_FTRACE) && \
@@ -545,7 +461,11 @@
{
int i, ret;
- if (trace_probe_is_registered(&tk->tp))
+ ret = security_locked_down(LOCKDOWN_KPROBES);
+ if (ret)
+ return ret;
+
+ if (trace_kprobe_is_registered(tk))
return -EINVAL;
if (within_notrace_func(tk)) {
@@ -554,8 +474,11 @@
return -EINVAL;
}
- for (i = 0; i < tk->tp.nr_args; i++)
- traceprobe_update_arg(&tk->tp.args[i]);
+ for (i = 0; i < tk->tp.nr_args; i++) {
+ ret = traceprobe_update_arg(&tk->tp.args[i]);
+ if (ret)
+ return ret;
+ }
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(&tk->tp))
@@ -568,34 +491,32 @@
else
ret = register_kprobe(&tk->rp.kp);
- if (ret == 0) {
- tk->tp.flags |= TP_FLAG_REGISTERED;
- } else if (ret == -EILSEQ) {
- pr_warn("Probing address(0x%p) is not an instruction boundary.\n",
- tk->rp.kp.addr);
- ret = -EINVAL;
- }
return ret;
}
/* Internal unregister function - just handle k*probes and flags */
static void __unregister_trace_kprobe(struct trace_kprobe *tk)
{
- if (trace_probe_is_registered(&tk->tp)) {
+ if (trace_kprobe_is_registered(tk)) {
if (trace_kprobe_is_return(tk))
unregister_kretprobe(&tk->rp);
else
unregister_kprobe(&tk->rp.kp);
- tk->tp.flags &= ~TP_FLAG_REGISTERED;
- /* Cleanup kprobe for reuse */
+ /* Cleanup kprobe for reuse and mark it unregistered */
+ INIT_HLIST_NODE(&tk->rp.kp.hlist);
+ INIT_LIST_HEAD(&tk->rp.kp.list);
if (tk->rp.kp.symbol_name)
tk->rp.kp.addr = NULL;
}
}
-/* Unregister a trace_probe and probe_event: call with locking probe_lock */
+/* Unregister a trace_probe and probe_event */
static int unregister_trace_kprobe(struct trace_kprobe *tk)
{
+ /* If other probes are on the event, just unregister kprobe */
+ if (trace_probe_has_sibling(&tk->tp))
+ goto unreg;
+
/* Enabled event can not be unregistered */
if (trace_probe_is_enabled(&tk->tp))
return -EBUSY;
@@ -604,28 +525,101 @@
if (unregister_kprobe_event(tk))
return -EBUSY;
+unreg:
__unregister_trace_kprobe(tk);
- list_del(&tk->list);
+ dyn_event_remove(&tk->devent);
+ trace_probe_unlink(&tk->tp);
return 0;
}
+static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
+ struct trace_kprobe *comp)
+{
+ struct trace_probe_event *tpe = orig->tp.event;
+ struct trace_probe *pos;
+ int i;
+
+ list_for_each_entry(pos, &tpe->probes, list) {
+ orig = container_of(pos, struct trace_kprobe, tp);
+ if (strcmp(trace_kprobe_symbol(orig),
+ trace_kprobe_symbol(comp)) ||
+ trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
+ continue;
+
+ /*
+ * trace_probe_compare_arg_type() ensured that nr_args and
+ * each argument name and type are same. Let's compare comm.
+ */
+ for (i = 0; i < orig->tp.nr_args; i++) {
+ if (strcmp(orig->tp.args[i].comm,
+ comp->tp.args[i].comm))
+ break;
+ }
+
+ if (i == orig->tp.nr_args)
+ return true;
+ }
+
+ return false;
+}
+
+static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
+{
+ int ret;
+
+ ret = trace_probe_compare_arg_type(&tk->tp, &to->tp);
+ if (ret) {
+ /* Note that argument starts index = 2 */
+ trace_probe_log_set_index(ret + 1);
+ trace_probe_log_err(0, DIFF_ARG_TYPE);
+ return -EEXIST;
+ }
+ if (trace_kprobe_has_same_kprobe(to, tk)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, SAME_PROBE);
+ return -EEXIST;
+ }
+
+ /* Append to existing event */
+ ret = trace_probe_append(&tk->tp, &to->tp);
+ if (ret)
+ return ret;
+
+ /* Register k*probe */
+ ret = __register_trace_kprobe(tk);
+ if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
+ pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
+ ret = 0;
+ }
+
+ if (ret)
+ trace_probe_unlink(&tk->tp);
+ else
+ dyn_event_add(&tk->devent);
+
+ return ret;
+}
+
/* Register a trace_probe and probe_event */
static int register_trace_kprobe(struct trace_kprobe *tk)
{
struct trace_kprobe *old_tk;
int ret;
- mutex_lock(&probe_lock);
+ mutex_lock(&event_mutex);
- /* Delete old (same name) event if exist */
- old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
- tk->tp.call.class->system);
+ old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
+ trace_probe_group_name(&tk->tp));
if (old_tk) {
- ret = unregister_trace_kprobe(old_tk);
- if (ret < 0)
- goto end;
- free_trace_kprobe(old_tk);
+ if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, DIFF_PROBE_TYPE);
+ ret = -EEXIST;
+ } else {
+ ret = append_trace_kprobe(tk, old_tk);
+ }
+ goto end;
}
/* Register new event */
@@ -645,10 +639,10 @@
if (ret < 0)
unregister_kprobe_event(tk);
else
- list_add_tail(&tk->list, &probe_list);
+ dyn_event_add(&tk->devent);
end:
- mutex_unlock(&probe_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -657,6 +651,7 @@
unsigned long val, void *data)
{
struct module *mod = data;
+ struct dyn_event *pos;
struct trace_kprobe *tk;
int ret;
@@ -664,19 +659,19 @@
return NOTIFY_DONE;
/* Update probes on coming module */
- mutex_lock(&probe_lock);
- list_for_each_entry(tk, &probe_list, list) {
+ mutex_lock(&event_mutex);
+ for_each_trace_kprobe(tk, pos) {
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_kprobe(tk);
ret = __register_trace_kprobe(tk);
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
- trace_event_name(&tk->tp.call),
+ trace_probe_name(&tk->tp),
mod->name, ret);
}
}
- mutex_unlock(&probe_lock);
+ mutex_unlock(&event_mutex);
return NOTIFY_DONE;
}
@@ -694,7 +689,7 @@
*name = '_';
}
-static int create_trace_kprobe(int argc, char **argv)
+static int trace_kprobe_create(int argc, const char *argv[])
{
/*
* Argument syntax:
@@ -717,114 +712,99 @@
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
- struct trace_kprobe *tk;
- int i, ret = 0;
- bool is_return = false, is_delete = false;
- char *symbol = NULL, *event = NULL, *group = NULL;
+ struct trace_kprobe *tk = NULL;
+ int i, len, ret = 0;
+ bool is_return = false;
+ char *symbol = NULL, *tmp = NULL;
+ const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
int maxactive = 0;
- char *arg;
long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
+ unsigned int flags = TPARG_FL_KERNEL;
- /* argc must be >= 1 */
- if (argv[0][0] == 'p')
- is_return = false;
- else if (argv[0][0] == 'r')
+ switch (argv[0][0]) {
+ case 'r':
is_return = true;
- else if (argv[0][0] == '-')
- is_delete = true;
- else {
- pr_info("Probe definition must be started with 'p', 'r' or"
- " '-'.\n");
- return -EINVAL;
+ flags |= TPARG_FL_RETURN;
+ break;
+ case 'p':
+ break;
+ default:
+ return -ECANCELED;
}
+ if (argc < 2)
+ return -ECANCELED;
+
+ trace_probe_log_init("trace_kprobe", argc, argv);
event = strchr(&argv[0][1], ':');
- if (event) {
- event[0] = '\0';
+ if (event)
event++;
- }
- if (is_return && isdigit(argv[0][1])) {
- ret = kstrtouint(&argv[0][1], 0, &maxactive);
- if (ret) {
- pr_info("Failed to parse maxactive.\n");
- return ret;
+
+ if (isdigit(argv[0][1])) {
+ if (!is_return) {
+ trace_probe_log_err(1, MAXACT_NO_KPROBE);
+ goto parse_error;
+ }
+ if (event)
+ len = event - &argv[0][1] - 1;
+ else
+ len = strlen(&argv[0][1]);
+ if (len > MAX_EVENT_NAME_LEN - 1) {
+ trace_probe_log_err(1, BAD_MAXACT);
+ goto parse_error;
+ }
+ memcpy(buf, &argv[0][1], len);
+ buf[len] = '\0';
+ ret = kstrtouint(buf, 0, &maxactive);
+ if (ret || !maxactive) {
+ trace_probe_log_err(1, BAD_MAXACT);
+ goto parse_error;
}
/* kretprobes instances are iterated over via a list. The
* maximum should stay reasonable.
*/
if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
- pr_info("Maxactive is too big (%d > %d).\n",
- maxactive, KRETPROBE_MAXACTIVE_MAX);
- return -E2BIG;
+ trace_probe_log_err(1, MAXACT_TOO_BIG);
+ goto parse_error;
}
}
- if (event) {
- if (strchr(event, '/')) {
- group = event;
- event = strchr(group, '/') + 1;
- event[-1] = '\0';
- if (strlen(group) == 0) {
- pr_info("Group name is not specified\n");
- return -EINVAL;
- }
- }
- if (strlen(event) == 0) {
- pr_info("Event name is not specified\n");
- return -EINVAL;
- }
- }
- if (!group)
- group = KPROBE_EVENT_SYSTEM;
-
- if (is_delete) {
- if (!event) {
- pr_info("Delete command needs an event name.\n");
- return -EINVAL;
- }
- mutex_lock(&probe_lock);
- tk = find_trace_kprobe(event, group);
- if (!tk) {
- mutex_unlock(&probe_lock);
- pr_info("Event %s/%s doesn't exist.\n", group, event);
- return -ENOENT;
- }
- /* delete an event */
- ret = unregister_trace_kprobe(tk);
- if (ret == 0)
- free_trace_kprobe(tk);
- mutex_unlock(&probe_lock);
- return ret;
- }
-
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
-
/* try to parse an address. if that fails, try to read the
* input as a symbol. */
if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
+ trace_probe_log_set_index(1);
+ /* Check whether uprobe event specified */
+ if (strchr(argv[1], '/') && strchr(argv[1], ':')) {
+ ret = -ECANCELED;
+ goto error;
+ }
/* a symbol specified */
- symbol = argv[1];
+ symbol = kstrdup(argv[1], GFP_KERNEL);
+ if (!symbol)
+ return -ENOMEM;
/* TODO: support .init module functions */
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret || offset < 0 || offset > UINT_MAX) {
- pr_info("Failed to parse either an address or a symbol.\n");
- return ret;
+ trace_probe_log_err(0, BAD_PROBE_ADDR);
+ goto parse_error;
}
- if (offset && is_return &&
- !kprobe_on_func_entry(NULL, symbol, offset)) {
- pr_info("Given offset is not valid for return probe.\n");
- return -EINVAL;
+ if (kprobe_on_func_entry(NULL, symbol, offset))
+ flags |= TPARG_FL_FENTRY;
+ if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
+ trace_probe_log_err(0, BAD_RETPROBE);
+ goto parse_error;
}
}
- argc -= 2; argv += 2;
- /* setup a probe */
- if (!event) {
+ trace_probe_log_set_index(0);
+ if (event) {
+ ret = traceprobe_parse_event_name(&event, &group, buf,
+ event - argv[0]);
+ if (ret)
+ goto parse_error;
+ } else {
/* Make a new event name */
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
@@ -835,127 +815,90 @@
sanitize_event_name(buf);
event = buf;
}
+
+ /* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
- argc, is_return);
+ argc - 2, is_return);
if (IS_ERR(tk)) {
- pr_info("Failed to allocate trace_probe.(%d)\n",
- (int)PTR_ERR(tk));
- return PTR_ERR(tk);
+ ret = PTR_ERR(tk);
+ /* This must return -ENOMEM, else there is a bug */
+ WARN_ON_ONCE(ret != -ENOMEM);
+ goto out; /* We know tk is not allocated */
}
+ argc -= 2; argv += 2;
/* parse arguments */
- ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
-
- /* Increment count for freeing args in error case */
- tk->tp.nr_args++;
-
- /* Parse argument name */
- arg = strchr(argv[i], '=');
- if (arg) {
- *arg++ = '\0';
- parg->name = kstrdup(argv[i], GFP_KERNEL);
- } else {
- arg = argv[i];
- /* If argument name is omitted, set "argN" */
- snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- parg->name = kstrdup(buf, GFP_KERNEL);
- }
-
- if (!parg->name) {
- pr_info("Failed to allocate argument[%d] name.\n", i);
+ tmp = kstrdup(argv[i], GFP_KERNEL);
+ if (!tmp) {
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(parg->name)) {
- pr_info("Invalid argument[%d] name: %s\n",
- i, parg->name);
- ret = -EINVAL;
- goto error;
- }
-
- if (traceprobe_conflict_field_name(parg->name,
- tk->tp.args, i)) {
- pr_info("Argument[%d] name '%s' conflicts with "
- "another field.\n", i, argv[i]);
- ret = -EINVAL;
- goto error;
- }
-
- /* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
- is_return, true,
- kprobes_fetch_type_table);
- if (ret) {
- pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
- goto error;
- }
+ trace_probe_log_set_index(i + 2);
+ ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
+ kfree(tmp);
+ if (ret)
+ goto error; /* This can be -ENOMEM */
}
+ ret = traceprobe_set_print_fmt(&tk->tp, is_return);
+ if (ret < 0)
+ goto error;
+
ret = register_trace_kprobe(tk);
- if (ret)
+ if (ret) {
+ trace_probe_log_set_index(1);
+ if (ret == -EILSEQ)
+ trace_probe_log_err(0, BAD_INSN_BNDRY);
+ else if (ret == -ENOENT)
+ trace_probe_log_err(0, BAD_PROBE_ADDR);
+ else if (ret != -ENOMEM && ret != -EEXIST)
+ trace_probe_log_err(0, FAIL_REG_PROBE);
goto error;
- return 0;
-
-error:
- free_trace_kprobe(tk);
- return ret;
-}
-
-static int release_all_trace_kprobes(void)
-{
- struct trace_kprobe *tk;
- int ret = 0;
-
- mutex_lock(&probe_lock);
- /* Ensure no probe is in use. */
- list_for_each_entry(tk, &probe_list, list)
- if (trace_probe_is_enabled(&tk->tp)) {
- ret = -EBUSY;
- goto end;
- }
- /* TODO: Use batch unregistration */
- while (!list_empty(&probe_list)) {
- tk = list_entry(probe_list.next, struct trace_kprobe, list);
- ret = unregister_trace_kprobe(tk);
- if (ret)
- goto end;
- free_trace_kprobe(tk);
}
-end:
- mutex_unlock(&probe_lock);
+out:
+ trace_probe_log_clear();
+ kfree(symbol);
+ return ret;
+parse_error:
+ ret = -EINVAL;
+error:
+ free_trace_kprobe(tk);
+ goto out;
+}
+
+static int create_or_delete_trace_kprobe(int argc, char **argv)
+{
+ int ret;
+
+ if (argv[0][0] == '-')
+ return dyn_event_release(argc, argv, &trace_kprobe_ops);
+
+ ret = trace_kprobe_create(argc, (const char **)argv);
+ return ret == -ECANCELED ? -EINVAL : ret;
+}
+
+static int trace_kprobe_release(struct dyn_event *ev)
+{
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
+ int ret = unregister_trace_kprobe(tk);
+
+ if (!ret)
+ free_trace_kprobe(tk);
return ret;
}
-/* Probes listing interfaces */
-static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
{
- mutex_lock(&probe_lock);
- return seq_list_start(&probe_list, *pos);
-}
-
-static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &probe_list, pos);
-}
-
-static void probes_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&probe_lock);
-}
-
-static int probes_seq_show(struct seq_file *m, void *v)
-{
- struct trace_kprobe *tk = v;
+ struct trace_kprobe *tk = to_trace_kprobe(ev);
int i;
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
- seq_printf(m, ":%s/%s", tk->tp.call.class->system,
- trace_event_name(&tk->tp.call));
+ seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
+ trace_probe_name(&tk->tp));
if (!tk->symbol)
seq_printf(m, " 0x%p", tk->rp.kp.addr);
@@ -972,10 +915,20 @@
return 0;
}
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_trace_kprobe(ev))
+ return 0;
+
+ return trace_kprobe_show(m, ev);
+}
+
static const struct seq_operations probes_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_seq_show
};
@@ -983,8 +936,12 @@
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = release_all_trace_kprobes();
+ ret = dyn_events_release_all(&trace_kprobe_ops);
if (ret < 0)
return ret;
}
@@ -996,7 +953,7 @@
size_t count, loff_t *ppos)
{
return trace_parse_run_command(file, buffer, count, ppos,
- create_trace_kprobe);
+ create_or_delete_trace_kprobe);
}
static const struct file_operations kprobe_events_ops = {
@@ -1011,10 +968,15 @@
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
- struct trace_kprobe *tk = v;
+ struct dyn_event *ev = v;
+ struct trace_kprobe *tk;
+ if (!is_trace_kprobe(ev))
+ return 0;
+
+ tk = to_trace_kprobe(ev);
seq_printf(m, " %-44s %15lu %15lu\n",
- trace_event_name(&tk->tp.call),
+ trace_probe_name(&tk->tp),
trace_kprobe_nhit(tk),
tk->rp.kp.nmissed);
@@ -1022,14 +984,20 @@
}
static const struct seq_operations profile_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_profile_seq_show
};
static int profile_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &profile_seq_op);
}
@@ -1041,6 +1009,145 @@
.release = seq_release,
};
+/* Kprobe specific fetch functions */
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
+{
+ int ret, len = 0;
+ u8 c;
+
+ do {
+ ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
+ len++;
+ } while (c && ret == 0 && len < MAX_STRING_SIZE);
+
+ return (ret < 0) ? ret : len;
+}
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+
+ return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
+}
+
+/*
+ * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
+ * length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
+{
+ int maxlen = get_loc_len(*(u32 *)dest);
+ void *__dest;
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+
+ __dest = get_loc_data(dest, base);
+
+ /*
+ * Try to get string again, since the string can be changed while
+ * probing.
+ */
+ ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+ return ret;
+}
+
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ const void __user *uaddr = (__force const void __user *)addr;
+ int maxlen = get_loc_len(*(u32 *)dest);
+ void *__dest;
+ long ret;
+
+ if (unlikely(!maxlen))
+ return -ENOMEM;
+
+ __dest = get_loc_data(dest, base);
+
+ ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
+ if (ret >= 0)
+ *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+ return ret;
+}
+
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+ return probe_kernel_read(dest, src, size);
+}
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ const void __user *uaddr = (__force const void __user *)src;
+
+ return probe_user_read(dest, uaddr, size);
+}
+
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+ void *base)
+{
+ unsigned long val;
+
+retry:
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ val = regs_get_kernel_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = kernel_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = (unsigned long)current->comm;
+ break;
+ case FETCH_OP_DATA:
+ val = (unsigned long)code->data;
+ break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ case FETCH_OP_ARG:
+ val = regs_get_kernel_argument(regs, code->param);
+ break;
+#endif
+ case FETCH_NOP_SYMBOL: /* Ignore a place holder */
+ code++;
+ goto retry;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
/* Kprobe handler */
static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
@@ -1051,7 +1158,7 @@
struct ring_buffer *buffer;
int size, dsize, pc;
unsigned long irq_flags;
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
WARN_ON(call != trace_file->event_call);
@@ -1072,7 +1179,7 @@
entry = ring_buffer_event_data(event);
entry->ip = (unsigned long)tk->rp.kp.addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
@@ -1083,7 +1190,7 @@
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tk->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tk->tp)
__kprobe_trace_func(tk, regs, link->file);
}
NOKPROBE_SYMBOL(kprobe_trace_func);
@@ -1099,7 +1206,7 @@
struct ring_buffer *buffer;
int size, pc, dsize;
unsigned long irq_flags;
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
WARN_ON(call != trace_file->event_call);
@@ -1121,7 +1228,7 @@
entry = ring_buffer_event_data(event);
entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
@@ -1133,7 +1240,7 @@
{
struct event_file_link *link;
- list_for_each_entry_rcu(link, &tk->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tk->tp)
__kretprobe_trace_func(tk, ri, regs, link->file);
}
NOKPROBE_SYMBOL(kretprobe_trace_func);
@@ -1146,24 +1253,23 @@
struct kprobe_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
- u8 *data;
- int i;
field = (struct kprobe_trace_entry_head *)iter->ent;
- tp = container_of(event, struct trace_probe, call.event);
+ tp = trace_probe_primary_from_call(
+ container_of(event, struct trace_event_call, event));
+ if (WARN_ON_ONCE(!tp))
+ goto out;
- trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
trace_seq_putc(s, ')');
- data = (u8 *)&field[1];
- for (i = 0; i < tp->nr_args; i++)
- if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset, field))
- goto out;
+ if (print_probe_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
trace_seq_putc(s, '\n');
out:
@@ -1177,13 +1283,14 @@
struct kretprobe_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
- u8 *data;
- int i;
field = (struct kretprobe_trace_entry_head *)iter->ent;
- tp = container_of(event, struct trace_probe, call.event);
+ tp = trace_probe_primary_from_call(
+ container_of(event, struct trace_event_call, event));
+ if (WARN_ON_ONCE(!tp))
+ goto out;
- trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
@@ -1195,11 +1302,9 @@
trace_seq_putc(s, ')');
- data = (u8 *)&field[1];
- for (i = 0; i < tp->nr_args; i++)
- if (!tp->args[i].type->print(s, tp->args[i].name,
- data + tp->args[i].offset, field))
- goto out;
+ if (print_probe_args(s, tp->args, tp->nr_args,
+ (u8 *)&field[1], field) < 0)
+ goto out;
trace_seq_putc(s, '\n');
@@ -1210,49 +1315,33 @@
static int kprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i;
+ int ret;
struct kprobe_trace_entry_head field;
- struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(event_call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENOENT;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
- /* Set argument names as fields */
- for (i = 0; i < tk->tp.nr_args; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name,
- sizeof(field) + parg->offset,
- parg->type->size,
- parg->type->is_signed,
- FILTER_OTHER);
- if (ret)
- return ret;
- }
- return 0;
+ return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
static int kretprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i;
+ int ret;
struct kretprobe_trace_entry_head field;
- struct trace_kprobe *tk = (struct trace_kprobe *)event_call->data;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(event_call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENOENT;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
- /* Set argument names as fields */
- for (i = 0; i < tk->tp.nr_args; i++) {
- struct probe_arg *parg = &tk->tp.args[i];
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name,
- sizeof(field) + parg->offset,
- parg->type->size,
- parg->type->is_signed,
- FILTER_OTHER);
- if (ret)
- return ret;
- }
- return 0;
+ return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1261,7 +1350,7 @@
static int
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1299,7 +1388,7 @@
entry->ip = (unsigned long)tk->rp.kp.addr;
memset(&entry[1], 0, dsize);
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
@@ -1311,7 +1400,7 @@
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- struct trace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
@@ -1335,7 +1424,7 @@
entry->func = (unsigned long)tk->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
- store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
+ store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
@@ -1380,20 +1469,19 @@
static int kprobe_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
- struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
- return enable_trace_kprobe(tk, file);
+ return enable_trace_kprobe(event, file);
case TRACE_REG_UNREGISTER:
- return disable_trace_kprobe(tk, file);
+ return disable_trace_kprobe(event, file);
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return enable_trace_kprobe(tk, NULL);
+ return enable_trace_kprobe(event, NULL);
case TRACE_REG_PERF_UNREGISTER:
- return disable_trace_kprobe(tk, NULL);
+ return disable_trace_kprobe(event, NULL);
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
@@ -1411,10 +1499,10 @@
raw_cpu_inc(*tk->nhit);
- if (tk->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
kprobe_trace_func(tk, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
ret = kprobe_perf_func(tk, regs);
#endif
return ret;
@@ -1428,10 +1516,10 @@
raw_cpu_inc(*tk->nhit);
- if (tk->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
kretprobe_trace_func(tk, ri, regs);
#ifdef CONFIG_PERF_EVENTS
- if (tk->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
kretprobe_perf_func(tk, ri, regs);
#endif
return 0; /* We don't tweek kernel, so just return 0 */
@@ -1446,10 +1534,10 @@
.trace = print_kprobe_event
};
-static inline void init_trace_event_call(struct trace_kprobe *tk,
- struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_kprobe *tk)
{
- INIT_LIST_HEAD(&call->class->fields);
+ struct trace_event_call *call = trace_probe_event_call(&tk->tp);
+
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
call->class->define_fields = kretprobe_event_define_fields;
@@ -1460,42 +1548,18 @@
call->flags = TRACE_EVENT_FL_KPROBE;
call->class->reg = kprobe_register;
- call->data = tk;
}
static int register_kprobe_event(struct trace_kprobe *tk)
{
- struct trace_event_call *call = &tk->tp.call;
- int ret = 0;
+ init_trace_event_call(tk);
- init_trace_event_call(tk, call);
-
- if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
- return -ENOMEM;
- ret = register_trace_event(&call->event);
- if (!ret) {
- kfree(call->print_fmt);
- return -ENODEV;
- }
- ret = trace_add_event_call(call);
- if (ret) {
- pr_info("Failed to register kprobe event: %s\n",
- trace_event_name(call));
- kfree(call->print_fmt);
- unregister_trace_event(&call->event);
- }
- return ret;
+ return trace_probe_register_event_call(&tk->tp);
}
static int unregister_kprobe_event(struct trace_kprobe *tk)
{
- int ret;
-
- /* tp->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tk->tp.call);
- if (!ret)
- kfree(tk->tp.call.print_fmt);
- return ret;
+ return trace_probe_unregister_event_call(&tk->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1509,7 +1573,7 @@
char *event;
/*
- * local trace_kprobes are not added to probe_list, so they are never
+ * local trace_kprobes are not added to dyn_event, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name here.
*/
@@ -1525,20 +1589,18 @@
return ERR_CAST(tk);
}
- init_trace_event_call(tk, &tk->tp.call);
+ init_trace_event_call(tk);
- if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+ if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
ret = -ENOMEM;
goto error;
}
ret = __register_trace_kprobe(tk);
- if (ret < 0) {
- kfree(tk->tp.call.print_fmt);
+ if (ret < 0)
goto error;
- }
- return &tk->tp.call;
+ return trace_probe_event_call(&tk->tp);
error:
free_trace_kprobe(tk);
return ERR_PTR(ret);
@@ -1548,7 +1610,9 @@
{
struct trace_kprobe *tk;
- tk = container_of(event_call, struct trace_kprobe, tp.call);
+ tk = trace_kprobe_primary_from_call(event_call);
+ if (unlikely(!tk))
+ return;
if (trace_probe_is_enabled(&tk->tp)) {
WARN_ON(1);
@@ -1557,16 +1621,60 @@
__unregister_trace_kprobe(tk);
- kfree(tk->tp.call.print_fmt);
free_trace_kprobe(tk);
}
#endif /* CONFIG_PERF_EVENTS */
+static __init void enable_boot_kprobe_events(void)
+{
+ struct trace_array *tr = top_trace_array();
+ struct trace_event_file *file;
+ struct trace_kprobe *tk;
+ struct dyn_event *pos;
+
+ mutex_lock(&event_mutex);
+ for_each_trace_kprobe(tk, pos) {
+ list_for_each_entry(file, &tr->events, list)
+ if (file->event_call == trace_probe_event_call(&tk->tp))
+ trace_event_enable_disable(file, 1, 0);
+ }
+ mutex_unlock(&event_mutex);
+}
+
+static __init void setup_boot_kprobe_events(void)
+{
+ char *p, *cmd = kprobe_boot_events_buf;
+ int ret;
+
+ strreplace(kprobe_boot_events_buf, ',', ' ');
+
+ while (cmd && *cmd != '\0') {
+ p = strchr(cmd, ';');
+ if (p)
+ *p++ = '\0';
+
+ ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
+ if (ret)
+ pr_warn("Failed to add event(%d): %s\n", ret, cmd);
+ else
+ kprobe_boot_events_enabled = true;
+
+ cmd = p;
+ }
+
+ enable_boot_kprobe_events();
+}
+
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
struct dentry *d_tracer;
struct dentry *entry;
+ int ret;
+
+ ret = dyn_event_register(&trace_kprobe_ops);
+ if (ret)
+ return ret;
if (register_module_notifier(&trace_kprobe_module_nb))
return -EINVAL;
@@ -1588,6 +1696,9 @@
if (!entry)
pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+
+ setup_boot_kprobe_events();
+
return 0;
}
fs_initcall(init_kprobe_trace);
@@ -1600,7 +1711,7 @@
struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list)
- if (file->event_call == &tk->tp.call)
+ if (file->event_call == trace_probe_event_call(&tk->tp))
return file;
return NULL;
@@ -1620,13 +1731,17 @@
if (tracing_is_disabled())
return -ENODEV;
+ if (kprobe_boot_events_enabled) {
+ pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
+ return 0;
+ }
+
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
- ret = trace_run_command("p:testprobe kprobe_trace_selftest_target "
- "$stack $stack0 +0($stack)",
- create_trace_kprobe);
+ ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)",
+ create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function entry.\n");
warn++;
@@ -1642,12 +1757,13 @@
pr_warn("error on getting probe file.\n");
warn++;
} else
- enable_trace_kprobe(tk, file);
+ enable_trace_kprobe(
+ trace_probe_event_call(&tk->tp), file);
}
}
- ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target "
- "$retval", create_trace_kprobe);
+ ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval",
+ create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on probing function return.\n");
warn++;
@@ -1663,7 +1779,8 @@
pr_warn("error on getting probe file.\n");
warn++;
} else
- enable_trace_kprobe(tk, file);
+ enable_trace_kprobe(
+ trace_probe_event_call(&tk->tp), file);
}
}
@@ -1696,7 +1813,8 @@
pr_warn("error on getting probe file.\n");
warn++;
} else
- disable_trace_kprobe(tk, file);
+ disable_trace_kprobe(
+ trace_probe_event_call(&tk->tp), file);
}
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
@@ -1714,23 +1832,28 @@
pr_warn("error on getting probe file.\n");
warn++;
} else
- disable_trace_kprobe(tk, file);
+ disable_trace_kprobe(
+ trace_probe_event_call(&tk->tp), file);
}
- ret = trace_run_command("-:testprobe", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
- ret = trace_run_command("-:testprobe2", create_trace_kprobe);
+ ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe);
if (WARN_ON_ONCE(ret)) {
pr_warn("error on deleting a probe.\n");
warn++;
}
end:
- release_all_trace_kprobes();
+ ret = dyn_events_release_all(&trace_kprobe_ops);
+ if (WARN_ON_ONCE(ret)) {
+ pr_warn("error on cleaning up probes.\n");
+ warn++;
+ }
/*
* Wait for the optimizer work to finish. Otherwise it might fiddle
* with probes in already freed __init text.
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 6e6cc64..d54ce25 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -219,10 +219,10 @@
{
int i;
const char *ret = trace_seq_buffer_ptr(p);
+ const char *fmt = concatenate ? "%*phN" : "%*ph";
- for (i = 0; i < buf_len; i++)
- trace_seq_printf(p, "%s%2.2x", concatenate || i == 0 ? "" : " ",
- buf[i]);
+ for (i = 0; i < buf_len; i += 16)
+ trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]);
trace_seq_putc(p, 0);
return ret;
@@ -339,43 +339,24 @@
#endif /* CONFIG_KRETPROBES */
static void
-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+seq_print_sym(struct trace_seq *s, unsigned long address, bool offset)
{
- char str[KSYM_SYMBOL_LEN];
#ifdef CONFIG_KALLSYMS
+ char str[KSYM_SYMBOL_LEN];
const char *name;
- kallsyms_lookup(address, NULL, NULL, NULL, str);
-
+ if (offset)
+ sprint_symbol(str, address);
+ else
+ kallsyms_lookup(address, NULL, NULL, NULL, str);
name = kretprobed(str);
if (name && strlen(name)) {
- trace_seq_printf(s, fmt, name);
+ trace_seq_puts(s, name);
return;
}
#endif
- snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address);
- trace_seq_printf(s, fmt, str);
-}
-
-static void
-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
- unsigned long address)
-{
- char str[KSYM_SYMBOL_LEN];
-#ifdef CONFIG_KALLSYMS
- const char *name;
-
- sprint_symbol(str, address);
- name = kretprobed(str);
-
- if (name && strlen(name)) {
- trace_seq_printf(s, fmt, name);
- return;
- }
-#endif
- snprintf(str, KSYM_SYMBOL_LEN, "0x%08lx", address);
- trace_seq_printf(s, fmt, str);
+ trace_seq_printf(s, "0x%08lx", address);
}
#ifndef CONFIG_64BIT
@@ -424,10 +405,7 @@
goto out;
}
- if (sym_flags & TRACE_ITER_SYM_OFFSET)
- seq_print_sym_offset(s, "%s", ip);
- else
- seq_print_sym_short(s, "%s", ip);
+ seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET);
if (sym_flags & TRACE_ITER_SYM_ADDR)
trace_seq_printf(s, " <" IP_FMT ">", ip);
@@ -1079,7 +1057,7 @@
trace_seq_puts(s, "<stack trace>\n");
- for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
+ for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) {
if (trace_seq_has_overflowed(s))
break;
@@ -1131,17 +1109,10 @@
for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
unsigned long ip = field->caller[i];
- if (ip == ULONG_MAX || trace_seq_has_overflowed(s))
+ if (!ip || trace_seq_has_overflowed(s))
break;
trace_seq_puts(s, " => ");
-
- if (!ip) {
- trace_seq_puts(s, "??");
- trace_seq_putc(s, '\n');
- continue;
- }
-
seq_print_user_ip(s, mm, ip, flags);
trace_seq_putc(s, '\n');
}
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index 71f553c..4d8e99f 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -9,6 +9,7 @@
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/kprobes.h>
#include "trace.h"
#define CREATE_TRACE_POINTS
@@ -30,6 +31,7 @@
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on);
+NOKPROBE_SYMBOL(trace_hardirqs_on);
void trace_hardirqs_off(void)
{
@@ -43,6 +45,7 @@
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off);
+NOKPROBE_SYMBOL(trace_hardirqs_off);
__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
{
@@ -56,6 +59,7 @@
lockdep_hardirqs_on(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
+NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
@@ -69,6 +73,7 @@
lockdep_hardirqs_off(CALLER_ADDR0);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
+NOKPROBE_SYMBOL(trace_hardirqs_off_caller);
#endif /* CONFIG_TRACE_IRQFLAGS */
#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index b0875b3..d4e31e9 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -6,6 +6,7 @@
*
*/
#include <linux/seq_file.h>
+#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
@@ -115,7 +116,7 @@
* section, then we need to read the link list pointers. The trick is
* we pass the address of the string to the seq function just like
* we do for the kernel core formats. To get back the structure that
- * holds the format, we simply use containerof() and then go to the
+ * holds the format, we simply use container_of() and then go to the
* next format in the list.
*/
static const char **
@@ -348,6 +349,12 @@
static int
ftrace_formats_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &show_format_seq_ops);
}
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index e99c3ce..905b10a 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -13,7 +13,12 @@
#include "trace_probe.h"
-const char *reserved_field_names[] = {
+#undef C
+#define C(a, b) b
+
+static const char *trace_probe_err_text[] = { ERRORS };
+
+static const char *reserved_field_names[] = {
"common_type",
"common_flags",
"common_preempt_count",
@@ -26,14 +31,12 @@
/* Printing in basic type function template */
#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt) \
-int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name, \
- void *data, void *ent) \
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, void *data, void *ent)\
{ \
- trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
+ trace_seq_printf(s, fmt, *(type *)data); \
return !trace_seq_has_overflowed(s); \
} \
-const char PRINT_TYPE_FMT_NAME(tname)[] = fmt; \
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt;
DEFINE_BASIC_PRINT_TYPE_FUNC(u8, u8, "%u")
DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
@@ -48,193 +51,54 @@
DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
+int PRINT_TYPE_FUNC_NAME(symbol)(struct trace_seq *s, void *data, void *ent)
+{
+ trace_seq_printf(s, "%pS", (void *)*(unsigned long *)data);
+ return !trace_seq_has_overflowed(s);
+}
+const char PRINT_TYPE_FMT_NAME(symbol)[] = "%pS";
+
/* Print type function for string type */
-int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
- void *data, void *ent)
+int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent)
{
int len = *(u32 *)data >> 16;
if (!len)
- trace_seq_printf(s, " %s=(fault)", name);
+ trace_seq_puts(s, "(fault)");
else
- trace_seq_printf(s, " %s=\"%s\"", name,
+ trace_seq_printf(s, "\"%s\"",
(const char *)get_loc_data(data, ent));
return !trace_seq_has_overflowed(s);
}
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
-#define CHECK_FETCH_FUNCS(method, fn) \
- (((FETCH_FUNC_NAME(method, u8) == fn) || \
- (FETCH_FUNC_NAME(method, u16) == fn) || \
- (FETCH_FUNC_NAME(method, u32) == fn) || \
- (FETCH_FUNC_NAME(method, u64) == fn) || \
- (FETCH_FUNC_NAME(method, string) == fn) || \
- (FETCH_FUNC_NAME(method, string_size) == fn)) \
- && (fn != NULL))
+/* Fetch type information table */
+static const struct fetch_type probe_fetch_types[] = {
+ /* Special types */
+ __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
+ "__data_loc char[]"),
+ __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1,
+ "__data_loc char[]"),
+ /* Basic types */
+ ASSIGN_FETCH_TYPE(u8, u8, 0),
+ ASSIGN_FETCH_TYPE(u16, u16, 0),
+ ASSIGN_FETCH_TYPE(u32, u32, 0),
+ ASSIGN_FETCH_TYPE(u64, u64, 0),
+ ASSIGN_FETCH_TYPE(s8, u8, 1),
+ ASSIGN_FETCH_TYPE(s16, u16, 1),
+ ASSIGN_FETCH_TYPE(s32, u32, 1),
+ ASSIGN_FETCH_TYPE(s64, u64, 1),
+ ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
+ ASSIGN_FETCH_TYPE_ALIAS(symbol, ADDR_FETCH_TYPE, ADDR_FETCH_TYPE, 0),
-/* Data fetch function templates */
-#define DEFINE_FETCH_reg(type) \
-void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
-{ \
- *(type *)dest = (type)regs_get_register(regs, \
- (unsigned int)((unsigned long)offset)); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
-DEFINE_BASIC_FETCH_FUNCS(reg)
-/* No string on the register */
-#define fetch_reg_string NULL
-#define fetch_reg_string_size NULL
-
-#define DEFINE_FETCH_retval(type) \
-void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
- void *dummy, void *dest) \
-{ \
- *(type *)dest = (type)regs_return_value(regs); \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
-DEFINE_BASIC_FETCH_FUNCS(retval)
-/* No string on the retval */
-#define fetch_retval_string NULL
-#define fetch_retval_string_size NULL
-
-/* Dereference memory access function */
-struct deref_fetch_param {
- struct fetch_param orig;
- long offset;
- fetch_func_t fetch;
- fetch_func_t fetch_size;
+ ASSIGN_FETCH_TYPE_END
};
-#define DEFINE_FETCH_deref(type) \
-void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
- void *data, void *dest) \
-{ \
- struct deref_fetch_param *dprm = data; \
- unsigned long addr; \
- call_fetch(&dprm->orig, regs, &addr); \
- if (addr) { \
- addr += dprm->offset; \
- dprm->fetch(regs, (void *)addr, dest); \
- } else \
- *(type *)dest = 0; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
-DEFINE_BASIC_FETCH_FUNCS(deref)
-DEFINE_FETCH_deref(string)
-
-void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
- void *data, void *dest)
-{
- struct deref_fetch_param *dprm = data;
- unsigned long addr;
-
- call_fetch(&dprm->orig, regs, &addr);
- if (addr && dprm->fetch_size) {
- addr += dprm->offset;
- dprm->fetch_size(regs, (void *)addr, dest);
- } else
- *(string_size *)dest = 0;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
-
-static void update_deref_fetch_param(struct deref_fetch_param *data)
-{
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- update_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- update_symbol_cache(data->orig.data);
-}
-NOKPROBE_SYMBOL(update_deref_fetch_param);
-
-static void free_deref_fetch_param(struct deref_fetch_param *data)
-{
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- free_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- free_symbol_cache(data->orig.data);
- kfree(data);
-}
-NOKPROBE_SYMBOL(free_deref_fetch_param);
-
-/* Bitfield fetch function */
-struct bitfield_fetch_param {
- struct fetch_param orig;
- unsigned char hi_shift;
- unsigned char low_shift;
-};
-
-#define DEFINE_FETCH_bitfield(type) \
-void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
- void *data, void *dest) \
-{ \
- struct bitfield_fetch_param *bprm = data; \
- type buf = 0; \
- call_fetch(&bprm->orig, regs, &buf); \
- if (buf) { \
- buf <<= bprm->hi_shift; \
- buf >>= bprm->low_shift; \
- } \
- *(type *)dest = buf; \
-} \
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
-DEFINE_BASIC_FETCH_FUNCS(bitfield)
-#define fetch_bitfield_string NULL
-#define fetch_bitfield_string_size NULL
-
-static void
-update_bitfield_fetch_param(struct bitfield_fetch_param *data)
-{
- /*
- * Don't check the bitfield itself, because this must be the
- * last fetch function.
- */
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- update_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- update_symbol_cache(data->orig.data);
-}
-
-static void
-free_bitfield_fetch_param(struct bitfield_fetch_param *data)
-{
- /*
- * Don't check the bitfield itself, because this must be the
- * last fetch function.
- */
- if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
- free_deref_fetch_param(data->orig.data);
- else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
- free_symbol_cache(data->orig.data);
-
- kfree(data);
-}
-
-void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs,
- void *data, void *dest)
-{
- int maxlen = get_rloc_len(*(u32 *)dest);
- u8 *dst = get_rloc_data(dest);
- long ret;
-
- if (!maxlen)
- return;
-
- ret = strlcpy(dst, current->comm, maxlen);
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string));
-
-void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs,
- void *data, void *dest)
-{
- *(u32 *)dest = strlen(current->comm) + 1;
-}
-NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size));
-
-static const struct fetch_type *find_fetch_type(const char *type,
- const struct fetch_type *ftbl)
+static const struct fetch_type *find_fetch_type(const char *type)
{
int i;
@@ -255,56 +119,89 @@
switch (bs) {
case 8:
- return find_fetch_type("u8", ftbl);
+ return find_fetch_type("u8");
case 16:
- return find_fetch_type("u16", ftbl);
+ return find_fetch_type("u16");
case 32:
- return find_fetch_type("u32", ftbl);
+ return find_fetch_type("u32");
case 64:
- return find_fetch_type("u64", ftbl);
+ return find_fetch_type("u64");
default:
goto fail;
}
}
- for (i = 0; ftbl[i].name; i++) {
- if (strcmp(type, ftbl[i].name) == 0)
- return &ftbl[i];
+ for (i = 0; probe_fetch_types[i].name; i++) {
+ if (strcmp(type, probe_fetch_types[i].name) == 0)
+ return &probe_fetch_types[i];
}
fail:
return NULL;
}
-/* Special function : only accept unsigned long */
-static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
+static struct trace_probe_log trace_probe_log;
+
+void trace_probe_log_init(const char *subsystem, int argc, const char **argv)
{
- *(unsigned long *)dest = kernel_stack_pointer(regs);
+ trace_probe_log.subsystem = subsystem;
+ trace_probe_log.argc = argc;
+ trace_probe_log.argv = argv;
+ trace_probe_log.index = 0;
}
-NOKPROBE_SYMBOL(fetch_kernel_stack_address);
-static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
+void trace_probe_log_clear(void)
{
- *(unsigned long *)dest = user_stack_pointer(regs);
+ memset(&trace_probe_log, 0, sizeof(trace_probe_log));
}
-NOKPROBE_SYMBOL(fetch_user_stack_address);
-static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
- fetch_func_t orig_fn,
- const struct fetch_type *ftbl)
+void trace_probe_log_set_index(int index)
{
- int i;
+ trace_probe_log.index = index;
+}
- if (type != &ftbl[FETCH_TYPE_STRING])
- return NULL; /* Only string type needs size function */
+void __trace_probe_log_err(int offset, int err_type)
+{
+ char *command, *p;
+ int i, len = 0, pos = 0;
- for (i = 0; i < FETCH_MTD_END; i++)
- if (type->fetch[i] == orig_fn)
- return ftbl[FETCH_TYPE_STRSIZE].fetch[i];
+ if (!trace_probe_log.argv)
+ return;
- WARN_ON(1); /* This should not happen */
+ /* Recalcurate the length and allocate buffer */
+ for (i = 0; i < trace_probe_log.argc; i++) {
+ if (i == trace_probe_log.index)
+ pos = len;
+ len += strlen(trace_probe_log.argv[i]) + 1;
+ }
+ command = kzalloc(len, GFP_KERNEL);
+ if (!command)
+ return;
- return NULL;
+ if (trace_probe_log.index >= trace_probe_log.argc) {
+ /**
+ * Set the error position is next to the last arg + space.
+ * Note that len includes the terminal null and the cursor
+ * appaers at pos + 1.
+ */
+ pos = len;
+ offset = 0;
+ }
+
+ /* And make a command string from argv array */
+ p = command;
+ for (i = 0; i < trace_probe_log.argc; i++) {
+ len = strlen(trace_probe_log.argv[i]);
+ strcpy(p, trace_probe_log.argv[i]);
+ p[len] = ' ';
+ p += len + 1;
+ }
+ *(p - 1) = '\0';
+
+ tracing_log_err(NULL, trace_probe_log.subsystem, command,
+ trace_probe_err_text, err_type, pos + offset);
+
+ kfree(command);
}
/* Split symbol and offset. */
@@ -328,155 +225,280 @@
return 0;
}
+/* @buf must has MAX_EVENT_NAME_LEN size */
+int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
+ char *buf, int offset)
+{
+ const char *slash, *event = *pevent;
+ int len;
+
+ slash = strchr(event, '/');
+ if (slash) {
+ if (slash == event) {
+ trace_probe_log_err(offset, NO_GROUP_NAME);
+ return -EINVAL;
+ }
+ if (slash - event + 1 > MAX_EVENT_NAME_LEN) {
+ trace_probe_log_err(offset, GROUP_TOO_LONG);
+ return -EINVAL;
+ }
+ strlcpy(buf, event, slash - event + 1);
+ if (!is_good_name(buf)) {
+ trace_probe_log_err(offset, BAD_GROUP_NAME);
+ return -EINVAL;
+ }
+ *pgroup = buf;
+ *pevent = slash + 1;
+ offset += slash - event + 1;
+ event = *pevent;
+ }
+ len = strlen(event);
+ if (len == 0) {
+ trace_probe_log_err(offset, NO_EVENT_NAME);
+ return -EINVAL;
+ } else if (len > MAX_EVENT_NAME_LEN) {
+ trace_probe_log_err(offset, EVENT_TOO_LONG);
+ return -EINVAL;
+ }
+ if (!is_good_name(event)) {
+ trace_probe_log_err(offset, BAD_EVENT_NAME);
+ return -EINVAL;
+ }
+ return 0;
+}
+
#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
static int parse_probe_vars(char *arg, const struct fetch_type *t,
- struct fetch_param *f, bool is_return,
- bool is_kprobe)
+ struct fetch_insn *code, unsigned int flags, int offs)
{
- int ret = 0;
unsigned long param;
+ int ret = 0;
+ int len;
if (strcmp(arg, "retval") == 0) {
- if (is_return)
- f->fn = t->fetch[FETCH_MTD_retval];
- else
+ if (flags & TPARG_FL_RETURN) {
+ code->op = FETCH_OP_RETVAL;
+ } else {
+ trace_probe_log_err(offs, RETVAL_ON_PROBE);
ret = -EINVAL;
- } else if (strncmp(arg, "stack", 5) == 0) {
- if (arg[5] == '\0') {
- if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR))
- return -EINVAL;
-
- if (is_kprobe)
- f->fn = fetch_kernel_stack_address;
- else
- f->fn = fetch_user_stack_address;
- } else if (isdigit(arg[5])) {
- ret = kstrtoul(arg + 5, 10, ¶m);
- if (ret || (is_kprobe && param > PARAM_MAX_STACK))
+ }
+ } else if ((len = str_has_prefix(arg, "stack"))) {
+ if (arg[len] == '\0') {
+ code->op = FETCH_OP_STACKP;
+ } else if (isdigit(arg[len])) {
+ ret = kstrtoul(arg + len, 10, ¶m);
+ if (ret) {
+ goto inval_var;
+ } else if ((flags & TPARG_FL_KERNEL) &&
+ param > PARAM_MAX_STACK) {
+ trace_probe_log_err(offs, BAD_STACK_NUM);
ret = -EINVAL;
- else {
- f->fn = t->fetch[FETCH_MTD_stack];
- f->data = (void *)param;
+ } else {
+ code->op = FETCH_OP_STACK;
+ code->param = (unsigned int)param;
}
} else
- ret = -EINVAL;
+ goto inval_var;
} else if (strcmp(arg, "comm") == 0) {
- if (strcmp(t->name, "string") != 0 &&
- strcmp(t->name, "string_size") != 0)
+ code->op = FETCH_OP_COMM;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+ } else if (((flags & TPARG_FL_MASK) ==
+ (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) &&
+ (len = str_has_prefix(arg, "arg"))) {
+ ret = kstrtoul(arg + len, 10, ¶m);
+ if (ret) {
+ goto inval_var;
+ } else if (!param || param > PARAM_MAX_STACK) {
+ trace_probe_log_err(offs, BAD_ARG_NUM);
return -EINVAL;
- f->fn = t->fetch[FETCH_MTD_comm];
+ }
+ code->op = FETCH_OP_ARG;
+ code->param = (unsigned int)param - 1;
+#endif
} else
- ret = -EINVAL;
+ goto inval_var;
return ret;
+
+inval_var:
+ trace_probe_log_err(offs, BAD_VAR);
+ return -EINVAL;
+}
+
+static int str_to_immediate(char *str, unsigned long *imm)
+{
+ if (isdigit(str[0]))
+ return kstrtoul(str, 0, imm);
+ else if (str[0] == '-')
+ return kstrtol(str, 0, (long *)imm);
+ else if (str[0] == '+')
+ return kstrtol(str + 1, 0, (long *)imm);
+ return -EINVAL;
+}
+
+static int __parse_imm_string(char *str, char **pbuf, int offs)
+{
+ size_t len = strlen(str);
+
+ if (str[len - 1] != '"') {
+ trace_probe_log_err(offs + len, IMMSTR_NO_CLOSE);
+ return -EINVAL;
+ }
+ *pbuf = kstrndup(str, len - 1, GFP_KERNEL);
+ return 0;
}
/* Recursive argument parser */
-static int parse_probe_arg(char *arg, const struct fetch_type *t,
- struct fetch_param *f, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl)
+static int
+parse_probe_arg(char *arg, const struct fetch_type *type,
+ struct fetch_insn **pcode, struct fetch_insn *end,
+ unsigned int flags, int offs)
{
+ struct fetch_insn *code = *pcode;
unsigned long param;
- long offset;
+ int deref = FETCH_OP_DEREF;
+ long offset = 0;
char *tmp;
int ret = 0;
switch (arg[0]) {
case '$':
- ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
+ ret = parse_probe_vars(arg + 1, type, code, flags, offs);
break;
case '%': /* named register */
ret = regs_query_register_offset(arg + 1);
if (ret >= 0) {
- f->fn = t->fetch[FETCH_MTD_reg];
- f->data = (void *)(unsigned long)ret;
+ code->op = FETCH_OP_REG;
+ code->param = (unsigned int)ret;
ret = 0;
- }
+ } else
+ trace_probe_log_err(offs, BAD_REG_NAME);
break;
case '@': /* memory, file-offset or symbol */
if (isdigit(arg[1])) {
ret = kstrtoul(arg + 1, 0, ¶m);
- if (ret)
+ if (ret) {
+ trace_probe_log_err(offs, BAD_MEM_ADDR);
break;
-
- f->fn = t->fetch[FETCH_MTD_memory];
- f->data = (void *)param;
+ }
+ /* load address */
+ code->op = FETCH_OP_IMM;
+ code->immediate = param;
} else if (arg[1] == '+') {
/* kprobes don't support file offsets */
- if (is_kprobe)
+ if (flags & TPARG_FL_KERNEL) {
+ trace_probe_log_err(offs, FILE_ON_KPROBE);
return -EINVAL;
-
+ }
ret = kstrtol(arg + 2, 0, &offset);
- if (ret)
+ if (ret) {
+ trace_probe_log_err(offs, BAD_FILE_OFFS);
break;
+ }
- f->fn = t->fetch[FETCH_MTD_file_offset];
- f->data = (void *)offset;
+ code->op = FETCH_OP_FOFFS;
+ code->immediate = (unsigned long)offset; // imm64?
} else {
/* uprobes don't support symbols */
- if (!is_kprobe)
+ if (!(flags & TPARG_FL_KERNEL)) {
+ trace_probe_log_err(offs, SYM_ON_UPROBE);
return -EINVAL;
-
- ret = traceprobe_split_symbol_offset(arg + 1, &offset);
- if (ret)
- break;
-
- f->data = alloc_symbol_cache(arg + 1, offset);
- if (f->data)
- f->fn = t->fetch[FETCH_MTD_symbol];
+ }
+ /* Preserve symbol for updating */
+ code->op = FETCH_NOP_SYMBOL;
+ code->data = kstrdup(arg + 1, GFP_KERNEL);
+ if (!code->data)
+ return -ENOMEM;
+ if (++code == end) {
+ trace_probe_log_err(offs, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ code->op = FETCH_OP_IMM;
+ code->immediate = 0;
}
+ /* These are fetching from memory */
+ if (++code == end) {
+ trace_probe_log_err(offs, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ *pcode = code;
+ code->op = FETCH_OP_DEREF;
+ code->offset = offset;
break;
case '+': /* deref memory */
- arg++; /* Skip '+', because kstrtol() rejects it. */
case '-':
+ if (arg[1] == 'u') {
+ deref = FETCH_OP_UDEREF;
+ arg[1] = arg[0];
+ arg++;
+ }
+ if (arg[0] == '+')
+ arg++; /* Skip '+', because kstrtol() rejects it. */
tmp = strchr(arg, '(');
- if (!tmp)
- break;
-
+ if (!tmp) {
+ trace_probe_log_err(offs, DEREF_NEED_BRACE);
+ return -EINVAL;
+ }
*tmp = '\0';
ret = kstrtol(arg, 0, &offset);
-
- if (ret)
+ if (ret) {
+ trace_probe_log_err(offs, BAD_DEREF_OFFS);
break;
-
+ }
+ offs += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0);
arg = tmp + 1;
tmp = strrchr(arg, ')');
+ if (!tmp) {
+ trace_probe_log_err(offs + strlen(arg),
+ DEREF_OPEN_BRACE);
+ return -EINVAL;
+ } else {
+ const struct fetch_type *t2 = find_fetch_type(NULL);
- if (tmp) {
- struct deref_fetch_param *dprm;
- const struct fetch_type *t2;
-
- t2 = find_fetch_type(NULL, ftbl);
*tmp = '\0';
- dprm = kzalloc(sizeof(struct deref_fetch_param), GFP_KERNEL);
-
- if (!dprm)
- return -ENOMEM;
-
- dprm->offset = offset;
- dprm->fetch = t->fetch[FETCH_MTD_memory];
- dprm->fetch_size = get_fetch_size_function(t,
- dprm->fetch, ftbl);
- ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
- is_kprobe, ftbl);
+ ret = parse_probe_arg(arg, t2, &code, end, flags, offs);
if (ret)
- kfree(dprm);
- else {
- f->fn = t->fetch[FETCH_MTD_deref];
- f->data = (void *)dprm;
+ break;
+ if (code->op == FETCH_OP_COMM ||
+ code->op == FETCH_OP_DATA) {
+ trace_probe_log_err(offs, COMM_CANT_DEREF);
+ return -EINVAL;
}
+ if (++code == end) {
+ trace_probe_log_err(offs, TOO_MANY_OPS);
+ return -EINVAL;
+ }
+ *pcode = code;
+
+ code->op = deref;
+ code->offset = offset;
+ }
+ break;
+ case '\\': /* Immediate value */
+ if (arg[1] == '"') { /* Immediate string */
+ ret = __parse_imm_string(arg + 2, &tmp, offs + 2);
+ if (ret)
+ break;
+ code->op = FETCH_OP_DATA;
+ code->data = tmp;
+ } else {
+ ret = str_to_immediate(arg + 1, &code->immediate);
+ if (ret)
+ trace_probe_log_err(offs + 1, BAD_IMM);
+ else
+ code->op = FETCH_OP_IMM;
}
break;
}
- if (!ret && !f->fn) { /* Parsed, but do not find fetch method */
- pr_info("%s type has no corresponding fetch method.\n", t->name);
+ if (!ret && code->op == FETCH_OP_NOP) {
+ /* Parsed, but do not find fetch method */
+ trace_probe_log_err(offs, BAD_FETCH_ARG);
ret = -EINVAL;
}
-
return ret;
}
@@ -485,22 +507,15 @@
/* Bitfield type needs to be parsed into a fetch function */
static int __parse_bitfield_probe_arg(const char *bf,
const struct fetch_type *t,
- struct fetch_param *f)
+ struct fetch_insn **pcode)
{
- struct bitfield_fetch_param *bprm;
+ struct fetch_insn *code = *pcode;
unsigned long bw, bo;
char *tail;
if (*bf != 'b')
return 0;
- bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
- if (!bprm)
- return -ENOMEM;
-
- bprm->orig = *f;
- f->fn = t->fetch[FETCH_MTD_bitfield];
- f->data = (void *)bprm;
bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
if (bw == 0 || *tail != '@')
@@ -511,67 +526,211 @@
if (tail == bf || *tail != '/')
return -EINVAL;
+ code++;
+ if (code->op != FETCH_OP_NOP)
+ return -EINVAL;
+ *pcode = code;
- bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
- bprm->low_shift = bprm->hi_shift + bo;
+ code->op = FETCH_OP_MOD_BF;
+ code->lshift = BYTES_TO_BITS(t->size) - (bw + bo);
+ code->rshift = BYTES_TO_BITS(t->size) - bw;
+ code->basesize = t->size;
return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
}
/* String length checking wrapper */
-int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
- struct probe_arg *parg, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl)
+static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
+ struct probe_arg *parg, unsigned int flags, int offset)
{
- const char *t;
- int ret;
+ struct fetch_insn *code, *scode, *tmp = NULL;
+ char *t, *t2, *t3;
+ int ret, len;
- if (strlen(arg) > MAX_ARGSTR_LEN) {
- pr_info("Argument is too long.: %s\n", arg);
- return -ENOSPC;
+ len = strlen(arg);
+ if (len > MAX_ARGSTR_LEN) {
+ trace_probe_log_err(offset, ARG_TOO_LONG);
+ return -EINVAL;
+ } else if (len == 0) {
+ trace_probe_log_err(offset, NO_ARG_BODY);
+ return -EINVAL;
}
+
parg->comm = kstrdup(arg, GFP_KERNEL);
- if (!parg->comm) {
- pr_info("Failed to allocate memory for command '%s'.\n", arg);
+ if (!parg->comm)
return -ENOMEM;
- }
- t = strchr(parg->comm, ':');
+
+ t = strchr(arg, ':');
if (t) {
- arg[t - parg->comm] = '\0';
- t++;
+ *t = '\0';
+ t2 = strchr(++t, '[');
+ if (t2) {
+ *t2++ = '\0';
+ t3 = strchr(t2, ']');
+ if (!t3) {
+ offset += t2 + strlen(t2) - arg;
+ trace_probe_log_err(offset,
+ ARRAY_NO_CLOSE);
+ return -EINVAL;
+ } else if (t3[1] != '\0') {
+ trace_probe_log_err(offset + t3 + 1 - arg,
+ BAD_ARRAY_SUFFIX);
+ return -EINVAL;
+ }
+ *t3 = '\0';
+ if (kstrtouint(t2, 0, &parg->count) || !parg->count) {
+ trace_probe_log_err(offset + t2 - arg,
+ BAD_ARRAY_NUM);
+ return -EINVAL;
+ }
+ if (parg->count > MAX_ARRAY_LEN) {
+ trace_probe_log_err(offset + t2 - arg,
+ ARRAY_TOO_BIG);
+ return -EINVAL;
+ }
+ }
}
+
/*
- * The default type of $comm should be "string", and it can't be
- * dereferenced.
+ * Since $comm and immediate string can not be dereferred,
+ * we can find those by strcmp.
*/
- if (!t && strcmp(arg, "$comm") == 0)
- t = "string";
- parg->type = find_fetch_type(t, ftbl);
+ if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
+ /* The type of $comm must be "string", and not an array. */
+ if (parg->count || (t && strcmp(t, "string")))
+ return -EINVAL;
+ parg->type = find_fetch_type("string");
+ } else
+ parg->type = find_fetch_type(t);
if (!parg->type) {
- pr_info("Unsupported type: %s\n", t);
+ trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE);
return -EINVAL;
}
parg->offset = *size;
- *size += parg->type->size;
- ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return,
- is_kprobe, ftbl);
+ *size += parg->type->size * (parg->count ?: 1);
- if (ret >= 0 && t != NULL)
- ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
-
- if (ret >= 0) {
- parg->fetch_size.fn = get_fetch_size_function(parg->type,
- parg->fetch.fn,
- ftbl);
- parg->fetch_size.data = parg->fetch.data;
+ if (parg->count) {
+ len = strlen(parg->type->fmttype) + 6;
+ parg->fmt = kmalloc(len, GFP_KERNEL);
+ if (!parg->fmt)
+ return -ENOMEM;
+ snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype,
+ parg->count);
}
+ code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
+ if (!code)
+ return -ENOMEM;
+ code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
+
+ ret = parse_probe_arg(arg, parg->type, &code, &code[FETCH_INSN_MAX - 1],
+ flags, offset);
+ if (ret)
+ goto fail;
+
+ /* Store operation */
+ if (!strcmp(parg->type->name, "string") ||
+ !strcmp(parg->type->name, "ustring")) {
+ if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
+ code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM &&
+ code->op != FETCH_OP_DATA) {
+ trace_probe_log_err(offset + (t ? (t - arg) : 0),
+ BAD_STRING);
+ ret = -EINVAL;
+ goto fail;
+ }
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
+ parg->count) {
+ /*
+ * IMM, DATA and COMM is pointing actual address, those
+ * must be kept, and if parg->count != 0, this is an
+ * array of string pointers instead of string address
+ * itself.
+ */
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ trace_probe_log_err(offset, TOO_MANY_OPS);
+ ret = -EINVAL;
+ goto fail;
+ }
+ }
+ /* If op == DEREF, replace it with STRING */
+ if (!strcmp(parg->type->name, "ustring") ||
+ code->op == FETCH_OP_UDEREF)
+ code->op = FETCH_OP_ST_USTRING;
+ else
+ code->op = FETCH_OP_ST_STRING;
+ code->size = parg->type->size;
+ parg->dynamic = true;
+ } else if (code->op == FETCH_OP_DEREF) {
+ code->op = FETCH_OP_ST_MEM;
+ code->size = parg->type->size;
+ } else if (code->op == FETCH_OP_UDEREF) {
+ code->op = FETCH_OP_ST_UMEM;
+ code->size = parg->type->size;
+ } else {
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ trace_probe_log_err(offset, TOO_MANY_OPS);
+ ret = -EINVAL;
+ goto fail;
+ }
+ code->op = FETCH_OP_ST_RAW;
+ code->size = parg->type->size;
+ }
+ scode = code;
+ /* Modify operation */
+ if (t != NULL) {
+ ret = __parse_bitfield_probe_arg(t, parg->type, &code);
+ if (ret) {
+ trace_probe_log_err(offset + t - arg, BAD_BITFIELD);
+ goto fail;
+ }
+ }
+ /* Loop(Array) operation */
+ if (parg->count) {
+ if (scode->op != FETCH_OP_ST_MEM &&
+ scode->op != FETCH_OP_ST_STRING &&
+ scode->op != FETCH_OP_ST_USTRING) {
+ trace_probe_log_err(offset + (t ? (t - arg) : 0),
+ BAD_STRING);
+ ret = -EINVAL;
+ goto fail;
+ }
+ code++;
+ if (code->op != FETCH_OP_NOP) {
+ trace_probe_log_err(offset, TOO_MANY_OPS);
+ ret = -EINVAL;
+ goto fail;
+ }
+ code->op = FETCH_OP_LP_ARRAY;
+ code->param = parg->count;
+ }
+ code++;
+ code->op = FETCH_OP_END;
+
+ /* Shrink down the code buffer */
+ parg->code = kcalloc(code - tmp + 1, sizeof(*code), GFP_KERNEL);
+ if (!parg->code)
+ ret = -ENOMEM;
+ else
+ memcpy(parg->code, tmp, sizeof(*code) * (code - tmp + 1));
+
+fail:
+ if (ret) {
+ for (code = tmp; code < tmp + FETCH_INSN_MAX; code++)
+ if (code->op == FETCH_NOP_SYMBOL ||
+ code->op == FETCH_OP_DATA)
+ kfree(code->data);
+ }
+ kfree(tmp);
+
return ret;
}
/* Return 1 if name is reserved or already used by another argument */
-int traceprobe_conflict_field_name(const char *name,
- struct probe_arg *args, int narg)
+static int traceprobe_conflict_field_name(const char *name,
+ struct probe_arg *args, int narg)
{
int i;
@@ -586,35 +745,105 @@
return 0;
}
-void traceprobe_update_arg(struct probe_arg *arg)
+int traceprobe_parse_probe_arg(struct trace_probe *tp, int i, char *arg,
+ unsigned int flags)
{
- if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
- update_bitfield_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
- update_deref_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
- update_symbol_cache(arg->fetch.data);
+ struct probe_arg *parg = &tp->args[i];
+ char *body;
+
+ /* Increment count for freeing args in error case */
+ tp->nr_args++;
+
+ body = strchr(arg, '=');
+ if (body) {
+ if (body - arg > MAX_ARG_NAME_LEN) {
+ trace_probe_log_err(0, ARG_NAME_TOO_LONG);
+ return -EINVAL;
+ } else if (body == arg) {
+ trace_probe_log_err(0, NO_ARG_NAME);
+ return -EINVAL;
+ }
+ parg->name = kmemdup_nul(arg, body - arg, GFP_KERNEL);
+ body++;
+ } else {
+ /* If argument name is omitted, set "argN" */
+ parg->name = kasprintf(GFP_KERNEL, "arg%d", i + 1);
+ body = arg;
+ }
+ if (!parg->name)
+ return -ENOMEM;
+
+ if (!is_good_name(parg->name)) {
+ trace_probe_log_err(0, BAD_ARG_NAME);
+ return -EINVAL;
+ }
+ if (traceprobe_conflict_field_name(parg->name, tp->args, i)) {
+ trace_probe_log_err(0, USED_ARG_NAME);
+ return -EINVAL;
+ }
+ /* Parse fetch argument */
+ return traceprobe_parse_probe_arg_body(body, &tp->size, parg, flags,
+ body - arg);
}
void traceprobe_free_probe_arg(struct probe_arg *arg)
{
- if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
- free_bitfield_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
- free_deref_fetch_param(arg->fetch.data);
- else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
- free_symbol_cache(arg->fetch.data);
+ struct fetch_insn *code = arg->code;
+ while (code && code->op != FETCH_OP_END) {
+ if (code->op == FETCH_NOP_SYMBOL ||
+ code->op == FETCH_OP_DATA)
+ kfree(code->data);
+ code++;
+ }
+ kfree(arg->code);
kfree(arg->name);
kfree(arg->comm);
+ kfree(arg->fmt);
}
+int traceprobe_update_arg(struct probe_arg *arg)
+{
+ struct fetch_insn *code = arg->code;
+ long offset;
+ char *tmp;
+ char c;
+ int ret = 0;
+
+ while (code && code->op != FETCH_OP_END) {
+ if (code->op == FETCH_NOP_SYMBOL) {
+ if (code[1].op != FETCH_OP_IMM)
+ return -EINVAL;
+
+ tmp = strpbrk(code->data, "+-");
+ if (tmp)
+ c = *tmp;
+ ret = traceprobe_split_symbol_offset(code->data,
+ &offset);
+ if (ret)
+ return ret;
+
+ code[1].immediate =
+ (unsigned long)kallsyms_lookup_name(code->data);
+ if (tmp)
+ *tmp = c;
+ if (!code[1].immediate)
+ return -ENOENT;
+ code[1].immediate += offset;
+ }
+ code++;
+ }
+ return 0;
+}
+
+/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
bool is_return)
{
- int i;
+ struct probe_arg *parg;
+ int i, j;
int pos = 0;
-
const char *fmt, *arg;
if (!is_return) {
@@ -625,36 +854,53 @@
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
}
- /* When len=0, we just calculate the needed length */
-#define LEN_OR_ZERO (len ? len - pos : 0)
-
pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
for (i = 0; i < tp->nr_args; i++) {
- pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
- tp->args[i].name, tp->args[i].type->fmt);
+ parg = tp->args + i;
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name);
+ if (parg->count) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s",
+ parg->type->fmt);
+ for (j = 1; j < parg->count; j++)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s",
+ parg->type->fmt);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "}");
+ } else
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s",
+ parg->type->fmt);
}
pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
for (i = 0; i < tp->nr_args; i++) {
- if (strcmp(tp->args[i].type->name, "string") == 0)
+ parg = tp->args + i;
+ if (parg->count) {
+ if (strcmp(parg->type->name, "string") == 0)
+ fmt = ", __get_str(%s[%d])";
+ else
+ fmt = ", REC->%s[%d]";
+ for (j = 0; j < parg->count; j++)
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ fmt, parg->name, j);
+ } else {
+ if (strcmp(parg->type->name, "string") == 0)
+ fmt = ", __get_str(%s)";
+ else
+ fmt = ", REC->%s";
pos += snprintf(buf + pos, LEN_OR_ZERO,
- ", __get_str(%s)",
- tp->args[i].name);
- else
- pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
- tp->args[i].name);
+ fmt, parg->name);
+ }
}
-#undef LEN_OR_ZERO
-
/* return the length of print_fmt */
return pos;
}
+#undef LEN_OR_ZERO
-int set_print_fmt(struct trace_probe *tp, bool is_return)
+int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
{
+ struct trace_event_call *call = trace_probe_event_call(tp);
int len;
char *print_fmt;
@@ -666,7 +912,219 @@
/* Second: actually write the @print_fmt */
__set_print_fmt(tp, print_fmt, len + 1, is_return);
- tp->call.print_fmt = print_fmt;
+ call->print_fmt = print_fmt;
return 0;
}
+
+int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+ size_t offset, struct trace_probe *tp)
+{
+ int ret, i;
+
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++) {
+ struct probe_arg *parg = &tp->args[i];
+ const char *fmt = parg->type->fmttype;
+ int size = parg->type->size;
+
+ if (parg->fmt)
+ fmt = parg->fmt;
+ if (parg->count)
+ size *= parg->count;
+ ret = trace_define_field(event_call, fmt, parg->name,
+ offset + parg->offset, size,
+ parg->type->is_signed,
+ FILTER_OTHER);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static void trace_probe_event_free(struct trace_probe_event *tpe)
+{
+ kfree(tpe->class.system);
+ kfree(tpe->call.name);
+ kfree(tpe->call.print_fmt);
+ kfree(tpe);
+}
+
+int trace_probe_append(struct trace_probe *tp, struct trace_probe *to)
+{
+ if (trace_probe_has_sibling(tp))
+ return -EBUSY;
+
+ list_del_init(&tp->list);
+ trace_probe_event_free(tp->event);
+
+ tp->event = to->event;
+ list_add_tail(&tp->list, trace_probe_probe_list(to));
+
+ return 0;
+}
+
+void trace_probe_unlink(struct trace_probe *tp)
+{
+ list_del_init(&tp->list);
+ if (list_empty(trace_probe_probe_list(tp)))
+ trace_probe_event_free(tp->event);
+ tp->event = NULL;
+}
+
+void trace_probe_cleanup(struct trace_probe *tp)
+{
+ int i;
+
+ for (i = 0; i < tp->nr_args; i++)
+ traceprobe_free_probe_arg(&tp->args[i]);
+
+ if (tp->event)
+ trace_probe_unlink(tp);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+ const char *group)
+{
+ struct trace_event_call *call;
+ int ret = 0;
+
+ if (!event || !group)
+ return -EINVAL;
+
+ tp->event = kzalloc(sizeof(struct trace_probe_event), GFP_KERNEL);
+ if (!tp->event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&tp->event->files);
+ INIT_LIST_HEAD(&tp->event->class.fields);
+ INIT_LIST_HEAD(&tp->event->probes);
+ INIT_LIST_HEAD(&tp->list);
+ list_add(&tp->event->probes, &tp->list);
+
+ call = trace_probe_event_call(tp);
+ call->class = &tp->event->class;
+ call->name = kstrdup(event, GFP_KERNEL);
+ if (!call->name) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ tp->event->class.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->event->class.system) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ return 0;
+
+error:
+ trace_probe_cleanup(tp);
+ return ret;
+}
+
+int trace_probe_register_event_call(struct trace_probe *tp)
+{
+ struct trace_event_call *call = trace_probe_event_call(tp);
+ int ret;
+
+ ret = register_trace_event(&call->event);
+ if (!ret)
+ return -ENODEV;
+
+ ret = trace_add_event_call(call);
+ if (ret)
+ unregister_trace_event(&call->event);
+
+ return ret;
+}
+
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link)
+ return -ENOMEM;
+
+ link->file = file;
+ INIT_LIST_HEAD(&link->list);
+ list_add_tail_rcu(&link->list, &tp->event->files);
+ trace_probe_set_flag(tp, TP_FLAG_TRACE);
+ return 0;
+}
+
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+ struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ trace_probe_for_each_link(link, tp) {
+ if (link->file == file)
+ return link;
+ }
+
+ return NULL;
+}
+
+int trace_probe_remove_file(struct trace_probe *tp,
+ struct trace_event_file *file)
+{
+ struct event_file_link *link;
+
+ link = trace_probe_get_file_link(tp, file);
+ if (!link)
+ return -ENOENT;
+
+ list_del_rcu(&link->list);
+ synchronize_rcu();
+ kfree(link);
+
+ if (list_empty(&tp->event->files))
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+
+ return 0;
+}
+
+/*
+ * Return the smallest index of different type argument (start from 1).
+ * If all argument types and name are same, return 0.
+ */
+int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b)
+{
+ int i;
+
+ /* In case of more arguments */
+ if (a->nr_args < b->nr_args)
+ return a->nr_args + 1;
+ if (a->nr_args > b->nr_args)
+ return b->nr_args + 1;
+
+ for (i = 0; i < a->nr_args; i++) {
+ if ((b->nr_args <= i) ||
+ ((a->args[i].type != b->args[i].type) ||
+ (a->args[i].count != b->args[i].count) ||
+ strcmp(a->args[i].name, b->args[i].name)))
+ return i + 1;
+ }
+
+ return 0;
+}
+
+bool trace_probe_match_command_args(struct trace_probe *tp,
+ int argc, const char **argv)
+{
+ char buf[MAX_ARGSTR_LEN + 1];
+ int i;
+
+ if (tp->nr_args < argc)
+ return false;
+
+ for (i = 0; i < argc; i++) {
+ snprintf(buf, sizeof(buf), "%s=%s",
+ tp->args[i].name, tp->args[i].comm);
+ if (strcmp(buf, argv[i]))
+ return false;
+ }
+ return true;
+}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5f52668..4ee7037 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -23,6 +23,7 @@
#include <linux/stringify.h>
#include <linux/limits.h>
#include <linux/uaccess.h>
+#include <linux/bitops.h>
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -30,6 +31,8 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
+#define MAX_ARRAY_LEN 64
+#define MAX_ARG_NAME_LEN 32
#define MAX_STRING_SIZE PATH_MAX
/* Reserved field names */
@@ -52,52 +55,80 @@
/* Flags for trace_probe */
#define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2
-#define TP_FLAG_REGISTERED 4
+/* data_loc: data location, compatible with u32 */
+#define make_data_loc(len, offs) \
+ (((u32)(len) << 16) | ((u32)(offs) & 0xffff))
+#define get_loc_len(dl) ((u32)(dl) >> 16)
+#define get_loc_offs(dl) ((u32)(dl) & 0xffff)
-/* data_rloc: data relative location, compatible with u32 */
-#define make_data_rloc(len, roffs) \
- (((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
-#define get_rloc_len(dl) ((u32)(dl) >> 16)
-#define get_rloc_offs(dl) ((u32)(dl) & 0xffff)
-
-/*
- * Convert data_rloc to data_loc:
- * data_rloc stores the offset from data_rloc itself, but data_loc
- * stores the offset from event entry.
- */
-#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
-
-static nokprobe_inline void *get_rloc_data(u32 *dl)
-{
- return (u8 *)dl + get_rloc_offs(*dl);
-}
-
-/* For data_loc conversion */
static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
{
- return (u8 *)ent + get_rloc_offs(*dl);
+ return (u8 *)ent + get_loc_offs(*dl);
}
-/* Data fetch function type */
-typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
-/* Printing function type */
-typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, void *);
+static nokprobe_inline u32 update_data_loc(u32 loc, int consumed)
+{
+ u32 maxlen = get_loc_len(loc);
+ u32 offset = get_loc_offs(loc);
-/* Fetch types */
-enum {
- FETCH_MTD_reg = 0,
- FETCH_MTD_stack,
- FETCH_MTD_retval,
- FETCH_MTD_comm,
- FETCH_MTD_memory,
- FETCH_MTD_symbol,
- FETCH_MTD_deref,
- FETCH_MTD_bitfield,
- FETCH_MTD_file_offset,
- FETCH_MTD_END,
+ return make_data_loc(maxlen - consumed, offset + consumed);
+}
+
+/* Printing function type */
+typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
+
+enum fetch_op {
+ FETCH_OP_NOP = 0,
+ // Stage 1 (load) ops
+ FETCH_OP_REG, /* Register : .param = offset */
+ FETCH_OP_STACK, /* Stack : .param = index */
+ FETCH_OP_STACKP, /* Stack pointer */
+ FETCH_OP_RETVAL, /* Return value */
+ FETCH_OP_IMM, /* Immediate : .immediate */
+ FETCH_OP_COMM, /* Current comm */
+ FETCH_OP_ARG, /* Function argument : .param */
+ FETCH_OP_FOFFS, /* File offset: .immediate */
+ FETCH_OP_DATA, /* Allocated data: .data */
+ // Stage 2 (dereference) op
+ FETCH_OP_DEREF, /* Dereference: .offset */
+ FETCH_OP_UDEREF, /* User-space Dereference: .offset */
+ // Stage 3 (store) ops
+ FETCH_OP_ST_RAW, /* Raw: .size */
+ FETCH_OP_ST_MEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_UMEM, /* Mem: .offset, .size */
+ FETCH_OP_ST_STRING, /* String: .offset, .size */
+ FETCH_OP_ST_USTRING, /* User String: .offset, .size */
+ // Stage 4 (modify) op
+ FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */
+ // Stage 5 (loop) op
+ FETCH_OP_LP_ARRAY, /* Array: .param = loop count */
+ FETCH_OP_END,
+ FETCH_NOP_SYMBOL, /* Unresolved Symbol holder */
};
+struct fetch_insn {
+ enum fetch_op op;
+ union {
+ unsigned int param;
+ struct {
+ unsigned int size;
+ int offset;
+ };
+ struct {
+ unsigned char basesize;
+ unsigned char lshift;
+ unsigned char rshift;
+ };
+ unsigned long immediate;
+ void *data;
+ };
+};
+
+/* fetch + deref*N + store + mod + end <= 16, this allows N=12, enough */
+#define FETCH_INSN_MAX 16
+#define FETCH_TOKEN_COMM (-ECOMM)
+
/* Fetch type information table */
struct fetch_type {
const char *name; /* Name of type */
@@ -106,13 +137,6 @@
print_type_func_t print; /* Print functions */
const char *fmt; /* Fromat string */
const char *fmttype; /* Name in format file */
- /* Fetch functions */
- fetch_func_t fetch[FETCH_MTD_END];
-};
-
-struct fetch_param {
- fetch_func_t fn;
- void *data;
};
/* For defining macros, define string/string_size types */
@@ -124,8 +148,7 @@
/* Printing in basic type function template */
#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
- void *data, void *ent); \
+int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, void *data, void *ent);\
extern const char PRINT_TYPE_FMT_NAME(type)[]
DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
@@ -142,57 +165,7 @@
DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
DECLARE_BASIC_PRINT_TYPE_FUNC(string);
-
-#define FETCH_FUNC_NAME(method, type) fetch_##method##_##type
-
-/* Declare macro for basic types */
-#define DECLARE_FETCH_FUNC(method, type) \
-extern void FETCH_FUNC_NAME(method, type)(struct pt_regs *regs, \
- void *data, void *dest)
-
-#define DECLARE_BASIC_FETCH_FUNCS(method) \
-DECLARE_FETCH_FUNC(method, u8); \
-DECLARE_FETCH_FUNC(method, u16); \
-DECLARE_FETCH_FUNC(method, u32); \
-DECLARE_FETCH_FUNC(method, u64)
-
-DECLARE_BASIC_FETCH_FUNCS(reg);
-#define fetch_reg_string NULL
-#define fetch_reg_string_size NULL
-
-DECLARE_BASIC_FETCH_FUNCS(retval);
-#define fetch_retval_string NULL
-#define fetch_retval_string_size NULL
-
-DECLARE_BASIC_FETCH_FUNCS(symbol);
-DECLARE_FETCH_FUNC(symbol, string);
-DECLARE_FETCH_FUNC(symbol, string_size);
-
-DECLARE_BASIC_FETCH_FUNCS(deref);
-DECLARE_FETCH_FUNC(deref, string);
-DECLARE_FETCH_FUNC(deref, string_size);
-
-DECLARE_BASIC_FETCH_FUNCS(bitfield);
-#define fetch_bitfield_string NULL
-#define fetch_bitfield_string_size NULL
-
-/* comm only makes sense as a string */
-#define fetch_comm_u8 NULL
-#define fetch_comm_u16 NULL
-#define fetch_comm_u32 NULL
-#define fetch_comm_u64 NULL
-DECLARE_FETCH_FUNC(comm, string);
-DECLARE_FETCH_FUNC(comm, string_size);
-
-/*
- * Define macro for basic types - we don't need to define s* types, because
- * we have to care only about bitwidth at recording time.
- */
-#define DEFINE_BASIC_FETCH_FUNCS(method) \
-DEFINE_FETCH_##method(u8) \
-DEFINE_FETCH_##method(u16) \
-DEFINE_FETCH_##method(u32) \
-DEFINE_FETCH_##method(u64)
+DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) x##t
@@ -200,8 +173,9 @@
#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
-#define ASSIGN_FETCH_FUNC(method, type) \
- [FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
+#define __ADDR_FETCH_TYPE(t) u##t
+#define _ADDR_FETCH_TYPE(t) __ADDR_FETCH_TYPE(t)
+#define ADDR_FETCH_TYPE _ADDR_FETCH_TYPE(BITS_PER_LONG)
#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
{.name = _name, \
@@ -210,64 +184,23 @@
.print = PRINT_TYPE_FUNC_NAME(ptype), \
.fmt = PRINT_TYPE_FMT_NAME(ptype), \
.fmttype = _fmttype, \
- .fetch = { \
-ASSIGN_FETCH_FUNC(reg, ftype), \
-ASSIGN_FETCH_FUNC(stack, ftype), \
-ASSIGN_FETCH_FUNC(retval, ftype), \
-ASSIGN_FETCH_FUNC(comm, ftype), \
-ASSIGN_FETCH_FUNC(memory, ftype), \
-ASSIGN_FETCH_FUNC(symbol, ftype), \
-ASSIGN_FETCH_FUNC(deref, ftype), \
-ASSIGN_FETCH_FUNC(bitfield, ftype), \
-ASSIGN_FETCH_FUNC(file_offset, ftype), \
- } \
}
-
+#define _ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype) \
+ __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, #_fmttype)
#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
- __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
+ _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, ptype)
/* If ptype is an alias of atype, use this macro (show atype in format) */
#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign) \
- __ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+ _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype)
#define ASSIGN_FETCH_TYPE_END {}
-
-#define FETCH_TYPE_STRING 0
-#define FETCH_TYPE_STRSIZE 1
+#define MAX_ARRAY_LEN 64
#ifdef CONFIG_KPROBE_EVENTS
-struct symbol_cache;
-unsigned long update_symbol_cache(struct symbol_cache *sc);
-void free_symbol_cache(struct symbol_cache *sc);
-struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
bool trace_kprobe_on_func_entry(struct trace_event_call *call);
bool trace_kprobe_error_injectable(struct trace_event_call *call);
#else
-/* uprobes do not support symbol fetch methods */
-#define fetch_symbol_u8 NULL
-#define fetch_symbol_u16 NULL
-#define fetch_symbol_u32 NULL
-#define fetch_symbol_u64 NULL
-#define fetch_symbol_string NULL
-#define fetch_symbol_string_size NULL
-
-struct symbol_cache {
-};
-static inline unsigned long __used update_symbol_cache(struct symbol_cache *sc)
-{
- return 0;
-}
-
-static inline void __used free_symbol_cache(struct symbol_cache *sc)
-{
-}
-
-static inline struct symbol_cache * __used
-alloc_symbol_cache(const char *sym, long offset)
-{
- return NULL;
-}
-
static inline bool trace_kprobe_on_func_entry(struct trace_event_call *call)
{
return false;
@@ -280,19 +213,28 @@
#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
- struct fetch_param fetch;
- struct fetch_param fetch_size;
+ struct fetch_insn *code;
+ bool dynamic;/* Dynamic array (string) is used */
unsigned int offset; /* Offset from argument entry */
+ unsigned int count; /* Array count */
const char *name; /* Name of this argument */
const char *comm; /* Command of this argument */
+ char *fmt; /* Format string if needed */
const struct fetch_type *type; /* Type of this argument */
};
-struct trace_probe {
+/* Event call and class holder */
+struct trace_probe_event {
unsigned int flags; /* For TP_FLAG_* */
struct trace_event_class class;
struct trace_event_call call;
struct list_head files;
+ struct list_head probes;
+};
+
+struct trace_probe {
+ struct list_head list;
+ struct trace_probe_event *event;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
@@ -303,22 +245,102 @@
struct list_head list;
};
+static inline bool trace_probe_test_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ return !!(tp->event->flags & flag);
+}
+
+static inline void trace_probe_set_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ tp->event->flags |= flag;
+}
+
+static inline void trace_probe_clear_flag(struct trace_probe *tp,
+ unsigned int flag)
+{
+ tp->event->flags &= ~flag;
+}
+
static inline bool trace_probe_is_enabled(struct trace_probe *tp)
{
- return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+ return trace_probe_test_flag(tp, TP_FLAG_TRACE | TP_FLAG_PROFILE);
}
-static inline bool trace_probe_is_registered(struct trace_probe *tp)
+static inline const char *trace_probe_name(struct trace_probe *tp)
{
- return !!(tp->flags & TP_FLAG_REGISTERED);
+ return trace_event_name(&tp->event->call);
}
-static nokprobe_inline void call_fetch(struct fetch_param *fprm,
- struct pt_regs *regs, void *dest)
+static inline const char *trace_probe_group_name(struct trace_probe *tp)
{
- return fprm->fn(regs, fprm->data, dest);
+ return tp->event->call.class->system;
}
+static inline struct trace_event_call *
+ trace_probe_event_call(struct trace_probe *tp)
+{
+ return &tp->event->call;
+}
+
+static inline struct trace_probe_event *
+trace_probe_event_from_call(struct trace_event_call *event_call)
+{
+ return container_of(event_call, struct trace_probe_event, call);
+}
+
+static inline struct trace_probe *
+trace_probe_primary_from_call(struct trace_event_call *call)
+{
+ struct trace_probe_event *tpe = trace_probe_event_from_call(call);
+
+ return list_first_entry(&tpe->probes, struct trace_probe, list);
+}
+
+static inline struct list_head *trace_probe_probe_list(struct trace_probe *tp)
+{
+ return &tp->event->probes;
+}
+
+static inline bool trace_probe_has_sibling(struct trace_probe *tp)
+{
+ struct list_head *list = trace_probe_probe_list(tp);
+
+ return !list_empty(list) && !list_is_singular(list);
+}
+
+static inline int trace_probe_unregister_event_call(struct trace_probe *tp)
+{
+ /* tp->event is unregistered in trace_remove_event_call() */
+ return trace_remove_event_call(&tp->event->call);
+}
+
+static inline bool trace_probe_has_single_file(struct trace_probe *tp)
+{
+ return !!list_is_singular(&tp->event->files);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+ const char *group);
+void trace_probe_cleanup(struct trace_probe *tp);
+int trace_probe_append(struct trace_probe *tp, struct trace_probe *to);
+void trace_probe_unlink(struct trace_probe *tp);
+int trace_probe_register_event_call(struct trace_probe *tp);
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file);
+int trace_probe_remove_file(struct trace_probe *tp,
+ struct trace_event_file *file);
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+ struct trace_event_file *file);
+int trace_probe_compare_arg_type(struct trace_probe *a, struct trace_probe *b);
+bool trace_probe_match_command_args(struct trace_probe *tp,
+ int argc, const char **argv);
+
+#define trace_probe_for_each_link(pos, tp) \
+ list_for_each_entry(pos, &(tp)->event->files, list)
+#define trace_probe_for_each_link_rcu(pos, tp) \
+ list_for_each_entry_rcu(pos, &(tp)->event->files, list)
+
/* Check the name is good for event/group/fields */
static inline bool is_good_name(const char *name)
{
@@ -331,79 +353,22 @@
return true;
}
-static inline struct event_file_link *
-find_event_file_link(struct trace_probe *tp, struct trace_event_file *file)
-{
- struct event_file_link *link;
+#define TPARG_FL_RETURN BIT(0)
+#define TPARG_FL_KERNEL BIT(1)
+#define TPARG_FL_FENTRY BIT(2)
+#define TPARG_FL_MASK GENMASK(2, 0)
- list_for_each_entry(link, &tp->files, list)
- if (link->file == file)
- return link;
+extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
+ char *arg, unsigned int flags);
- return NULL;
-}
-
-extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
- struct probe_arg *parg, bool is_return, bool is_kprobe,
- const struct fetch_type *ftbl);
-
-extern int traceprobe_conflict_field_name(const char *name,
- struct probe_arg *args, int narg);
-
-extern void traceprobe_update_arg(struct probe_arg *arg);
+extern int traceprobe_update_arg(struct probe_arg *arg);
extern void traceprobe_free_probe_arg(struct probe_arg *arg);
extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
+int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
+ char *buf, int offset);
-/* Sum up total data length for dynamic arraies (strings) */
-static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
-{
- int i, ret = 0;
- u32 len;
-
- for (i = 0; i < tp->nr_args; i++)
- if (unlikely(tp->args[i].fetch_size.fn)) {
- call_fetch(&tp->args[i].fetch_size, regs, &len);
- ret += len;
- }
-
- return ret;
-}
-
-/* Store the value of each argument */
-static nokprobe_inline void
-store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
- u8 *data, int maxlen)
-{
- int i;
- u32 end = tp->size;
- u32 *dl; /* Data (relative) location */
-
- for (i = 0; i < tp->nr_args; i++) {
- if (unlikely(tp->args[i].fetch_size.fn)) {
- /*
- * First, we set the relative location and
- * maximum data length to *dl
- */
- dl = (u32 *)(data + tp->args[i].offset);
- *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
- /* Then try to fetch string or dynamic array data */
- call_fetch(&tp->args[i].fetch, regs, dl);
- /* Reduce maximum length */
- end += get_rloc_len(*dl);
- maxlen -= get_rloc_len(*dl);
- /* Trick here, convert data_rloc to data_loc */
- *dl = convert_rloc_to_loc(*dl,
- ent_size + tp->args[i].offset);
- } else
- /* Just fetching data normally */
- call_fetch(&tp->args[i].fetch, regs,
- data + tp->args[i].offset);
- }
-}
-
-extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+extern int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return);
#ifdef CONFIG_PERF_EVENTS
extern struct trace_event_call *
@@ -412,6 +377,87 @@
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
extern struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
+create_local_trace_uprobe(char *name, unsigned long offs,
+ unsigned long ref_ctr_offset, bool is_return);
extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
#endif
+extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
+ size_t offset, struct trace_probe *tp);
+
+#undef ERRORS
+#define ERRORS \
+ C(FILE_NOT_FOUND, "Failed to find the given file"), \
+ C(NO_REGULAR_FILE, "Not a regular file"), \
+ C(BAD_REFCNT, "Invalid reference counter offset"), \
+ C(REFCNT_OPEN_BRACE, "Reference counter brace is not closed"), \
+ C(BAD_REFCNT_SUFFIX, "Reference counter has wrong suffix"), \
+ C(BAD_UPROBE_OFFS, "Invalid uprobe offset"), \
+ C(MAXACT_NO_KPROBE, "Maxactive is not for kprobe"), \
+ C(BAD_MAXACT, "Invalid maxactive number"), \
+ C(MAXACT_TOO_BIG, "Maxactive is too big"), \
+ C(BAD_PROBE_ADDR, "Invalid probed address or symbol"), \
+ C(BAD_RETPROBE, "Retprobe address must be an function entry"), \
+ C(NO_GROUP_NAME, "Group name is not specified"), \
+ C(GROUP_TOO_LONG, "Group name is too long"), \
+ C(BAD_GROUP_NAME, "Group name must follow the same rules as C identifiers"), \
+ C(NO_EVENT_NAME, "Event name is not specified"), \
+ C(EVENT_TOO_LONG, "Event name is too long"), \
+ C(BAD_EVENT_NAME, "Event name must follow the same rules as C identifiers"), \
+ C(RETVAL_ON_PROBE, "$retval is not available on probe"), \
+ C(BAD_STACK_NUM, "Invalid stack number"), \
+ C(BAD_ARG_NUM, "Invalid argument number"), \
+ C(BAD_VAR, "Invalid $-valiable specified"), \
+ C(BAD_REG_NAME, "Invalid register name"), \
+ C(BAD_MEM_ADDR, "Invalid memory address"), \
+ C(BAD_IMM, "Invalid immediate value"), \
+ C(IMMSTR_NO_CLOSE, "String is not closed with '\"'"), \
+ C(FILE_ON_KPROBE, "File offset is not available with kprobe"), \
+ C(BAD_FILE_OFFS, "Invalid file offset value"), \
+ C(SYM_ON_UPROBE, "Symbol is not available with uprobe"), \
+ C(TOO_MANY_OPS, "Dereference is too much nested"), \
+ C(DEREF_NEED_BRACE, "Dereference needs a brace"), \
+ C(BAD_DEREF_OFFS, "Invalid dereference offset"), \
+ C(DEREF_OPEN_BRACE, "Dereference brace is not closed"), \
+ C(COMM_CANT_DEREF, "$comm can not be dereferenced"), \
+ C(BAD_FETCH_ARG, "Invalid fetch argument"), \
+ C(ARRAY_NO_CLOSE, "Array is not closed"), \
+ C(BAD_ARRAY_SUFFIX, "Array has wrong suffix"), \
+ C(BAD_ARRAY_NUM, "Invalid array size"), \
+ C(ARRAY_TOO_BIG, "Array number is too big"), \
+ C(BAD_TYPE, "Unknown type is specified"), \
+ C(BAD_STRING, "String accepts only memory argument"), \
+ C(BAD_BITFIELD, "Invalid bitfield"), \
+ C(ARG_NAME_TOO_LONG, "Argument name is too long"), \
+ C(NO_ARG_NAME, "Argument name is not specified"), \
+ C(BAD_ARG_NAME, "Argument name must follow the same rules as C identifiers"), \
+ C(USED_ARG_NAME, "This argument name is already used"), \
+ C(ARG_TOO_LONG, "Argument expression is too long"), \
+ C(NO_ARG_BODY, "No argument expression"), \
+ C(BAD_INSN_BNDRY, "Probe point is not an instruction boundary"),\
+ C(FAIL_REG_PROBE, "Failed to register probe event"),\
+ C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\
+ C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\
+ C(SAME_PROBE, "There is already the exact same probe event"),
+
+#undef C
+#define C(a, b) TP_ERR_##a
+
+/* Define TP_ERR_ */
+enum { ERRORS };
+
+/* Error text is defined in trace_probe.c */
+
+struct trace_probe_log {
+ const char *subsystem;
+ const char **argv;
+ int argc;
+ int index;
+};
+
+void trace_probe_log_init(const char *subsystem, int argc, const char **argv);
+void trace_probe_log_set_index(int index);
+void trace_probe_log_clear(void);
+void __trace_probe_log_err(int offset, int err);
+
+#define trace_probe_log_err(offs, err) \
+ __trace_probe_log_err(offs, TP_ERR_##err)
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
new file mode 100644
index 0000000..e528282
--- /dev/null
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Traceprobe fetch helper inlines
+ */
+
+static nokprobe_inline void
+fetch_store_raw(unsigned long val, struct fetch_insn *code, void *buf)
+{
+ switch (code->size) {
+ case 1:
+ *(u8 *)buf = (u8)val;
+ break;
+ case 2:
+ *(u16 *)buf = (u16)val;
+ break;
+ case 4:
+ *(u32 *)buf = (u32)val;
+ break;
+ case 8:
+ //TBD: 32bit signed
+ *(u64 *)buf = (u64)val;
+ break;
+ default:
+ *(unsigned long *)buf = val;
+ }
+}
+
+static nokprobe_inline void
+fetch_apply_bitfield(struct fetch_insn *code, void *buf)
+{
+ switch (code->basesize) {
+ case 1:
+ *(u8 *)buf <<= code->lshift;
+ *(u8 *)buf >>= code->rshift;
+ break;
+ case 2:
+ *(u16 *)buf <<= code->lshift;
+ *(u16 *)buf >>= code->rshift;
+ break;
+ case 4:
+ *(u32 *)buf <<= code->lshift;
+ *(u32 *)buf >>= code->rshift;
+ break;
+ case 8:
+ *(u64 *)buf <<= code->lshift;
+ *(u64 *)buf >>= code->rshift;
+ break;
+ }
+}
+
+/*
+ * These functions must be defined for each callsite.
+ * Return consumed dynamic data size (>= 0), or error (< 0).
+ * If dest is NULL, don't store result and return required dynamic data size.
+ */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
+ void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen_user(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size);
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size);
+
+/* From the 2nd stage, routine is same */
+static nokprobe_inline int
+process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
+ void *dest, void *base)
+{
+ struct fetch_insn *s3 = NULL;
+ int total = 0, ret = 0, i = 0;
+ u32 loc = 0;
+ unsigned long lval = val;
+
+stage2:
+ /* 2nd stage: dereference memory if needed */
+ do {
+ if (code->op == FETCH_OP_DEREF) {
+ lval = val;
+ ret = probe_mem_read(&val, (void *)val + code->offset,
+ sizeof(val));
+ } else if (code->op == FETCH_OP_UDEREF) {
+ lval = val;
+ ret = probe_mem_read_user(&val,
+ (void *)val + code->offset, sizeof(val));
+ } else
+ break;
+ if (ret)
+ return ret;
+ code++;
+ } while (1);
+
+ s3 = code;
+stage3:
+ /* 3rd stage: store value to buffer */
+ if (unlikely(!dest)) {
+ if (code->op == FETCH_OP_ST_STRING) {
+ ret = fetch_store_strlen(val + code->offset);
+ code++;
+ goto array;
+ } else if (code->op == FETCH_OP_ST_USTRING) {
+ ret += fetch_store_strlen_user(val + code->offset);
+ code++;
+ goto array;
+ } else
+ return -EILSEQ;
+ }
+
+ switch (code->op) {
+ case FETCH_OP_ST_RAW:
+ fetch_store_raw(val, code, dest);
+ break;
+ case FETCH_OP_ST_MEM:
+ probe_mem_read(dest, (void *)val + code->offset, code->size);
+ break;
+ case FETCH_OP_ST_UMEM:
+ probe_mem_read_user(dest, (void *)val + code->offset, code->size);
+ break;
+ case FETCH_OP_ST_STRING:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string(val + code->offset, dest, base);
+ break;
+ case FETCH_OP_ST_USTRING:
+ loc = *(u32 *)dest;
+ ret = fetch_store_string_user(val + code->offset, dest, base);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ /* 4th stage: modify stored value if needed */
+ if (code->op == FETCH_OP_MOD_BF) {
+ fetch_apply_bitfield(code, dest);
+ code++;
+ }
+
+array:
+ /* the last stage: Loop on array */
+ if (code->op == FETCH_OP_LP_ARRAY) {
+ total += ret;
+ if (++i < code->param) {
+ code = s3;
+ if (s3->op != FETCH_OP_ST_STRING &&
+ s3->op != FETCH_OP_ST_USTRING) {
+ dest += s3->size;
+ val += s3->size;
+ goto stage3;
+ }
+ code--;
+ val = lval + sizeof(char *);
+ if (dest) {
+ dest += sizeof(u32);
+ *(u32 *)dest = update_data_loc(loc, ret);
+ }
+ goto stage2;
+ }
+ code++;
+ ret = total;
+ }
+
+ return code->op == FETCH_OP_END ? ret : -EILSEQ;
+}
+
+/* Sum up total data length for dynamic arraies (strings) */
+static nokprobe_inline int
+__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
+{
+ struct probe_arg *arg;
+ int i, len, ret = 0;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ if (unlikely(arg->dynamic)) {
+ len = process_fetch_insn(arg->code, regs, NULL, NULL);
+ if (len > 0)
+ ret += len;
+ }
+ }
+
+ return ret;
+}
+
+/* Store the value of each argument */
+static nokprobe_inline void
+store_trace_args(void *data, struct trace_probe *tp, struct pt_regs *regs,
+ int header_size, int maxlen)
+{
+ struct probe_arg *arg;
+ void *base = data - header_size;
+ void *dyndata = data + tp->size;
+ u32 *dl; /* Data location */
+ int ret, i;
+
+ for (i = 0; i < tp->nr_args; i++) {
+ arg = tp->args + i;
+ dl = data + arg->offset;
+ /* Point the dynamic data area if needed */
+ if (unlikely(arg->dynamic))
+ *dl = make_data_loc(maxlen, dyndata - base);
+ ret = process_fetch_insn(arg->code, regs, dl, base);
+ if (unlikely(ret < 0 && arg->dynamic)) {
+ *dl = make_data_loc(0, dyndata - base);
+ } else {
+ dyndata += ret;
+ maxlen -= ret;
+ }
+ }
+}
+
+static inline int
+print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args,
+ u8 *data, void *field)
+{
+ void *p;
+ int i, j;
+
+ for (i = 0; i < nr_args; i++) {
+ struct probe_arg *a = args + i;
+
+ trace_seq_printf(s, " %s=", a->name);
+ if (likely(!a->count)) {
+ if (!a->type->print(s, data + a->offset, field))
+ return -ENOMEM;
+ continue;
+ }
+ trace_seq_putc(s, '{');
+ p = data + a->offset;
+ for (j = 0; j < a->count; j++) {
+ if (!a->type->print(s, p, field))
+ return -ENOMEM;
+ trace_seq_putc(s, j == a->count - 1 ? '}' : ',');
+ p += a->type->size;
+ }
+ }
+ return 0;
+}
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 7d04b98..5e43b96 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -35,26 +35,19 @@
static void wakeup_reset(struct trace_array *tr);
static void __wakeup_reset(struct trace_array *tr);
+static int start_func_tracer(struct trace_array *tr, int graph);
+static void stop_func_tracer(struct trace_array *tr, int graph);
static int save_flags;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static int wakeup_display_graph(struct trace_array *tr, int set);
# define is_graph(tr) ((tr)->trace_flags & TRACE_ITER_DISPLAY_GRAPH)
#else
-static inline int wakeup_display_graph(struct trace_array *tr, int set)
-{
- return 0;
-}
# define is_graph(tr) false
#endif
-
#ifdef CONFIG_FUNCTION_TRACER
-static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
-static void wakeup_graph_return(struct ftrace_graph_ret *trace);
-
static bool function_enabled;
/*
@@ -104,122 +97,8 @@
return 0;
}
-/*
- * wakeup uses its own tracer function to keep the overhead down:
- */
-static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *op, struct pt_regs *pt_regs)
-{
- struct trace_array *tr = wakeup_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- int pc;
-
- if (!func_prolog_preempt_disable(tr, &data, &pc))
- return;
-
- local_irq_save(flags);
- trace_function(tr, ip, parent_ip, flags, pc);
- local_irq_restore(flags);
-
- atomic_dec(&data->disabled);
- preempt_enable_notrace();
-}
-
-static int register_wakeup_function(struct trace_array *tr, int graph, int set)
-{
- int ret;
-
- /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
- if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
- return 0;
-
- if (graph)
- ret = register_ftrace_graph(&wakeup_graph_return,
- &wakeup_graph_entry);
- else
- ret = register_ftrace_function(tr->ops);
-
- if (!ret)
- function_enabled = true;
-
- return ret;
-}
-
-static void unregister_wakeup_function(struct trace_array *tr, int graph)
-{
- if (!function_enabled)
- return;
-
- if (graph)
- unregister_ftrace_graph();
- else
- unregister_ftrace_function(tr->ops);
-
- function_enabled = false;
-}
-
-static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
-{
- if (!(mask & TRACE_ITER_FUNCTION))
- return 0;
-
- if (set)
- register_wakeup_function(tr, is_graph(tr), 1);
- else
- unregister_wakeup_function(tr, is_graph(tr));
- return 1;
-}
-#else
-static int register_wakeup_function(struct trace_array *tr, int graph, int set)
-{
- return 0;
-}
-static void unregister_wakeup_function(struct trace_array *tr, int graph) { }
-static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
-{
- return 0;
-}
-#endif /* CONFIG_FUNCTION_TRACER */
-
-static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
-{
- struct tracer *tracer = tr->current_trace;
-
- if (wakeup_function_set(tr, mask, set))
- return 0;
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if (mask & TRACE_ITER_DISPLAY_GRAPH)
- return wakeup_display_graph(tr, set);
-#endif
- return trace_keep_overwrite(tracer, mask, set);
-}
-
-static int start_func_tracer(struct trace_array *tr, int graph)
-{
- int ret;
-
- ret = register_wakeup_function(tr, graph, 0);
-
- if (!ret && tracing_is_enabled())
- tracer_enabled = 1;
- else
- tracer_enabled = 0;
-
- return ret;
-}
-
-static void stop_func_tracer(struct trace_array *tr, int graph)
-{
- tracer_enabled = 0;
-
- unregister_wakeup_function(tr, graph);
-}
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static int wakeup_display_graph(struct trace_array *tr, int set)
{
if (!(is_graph(tr) ^ set))
@@ -283,6 +162,11 @@
return;
}
+static struct fgraph_ops fgraph_wakeup_ops = {
+ .entryfunc = &wakeup_graph_entry,
+ .retfunc = &wakeup_graph_return,
+};
+
static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
@@ -296,8 +180,11 @@
}
#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
- TRACE_GRAPH_PRINT_ABS_TIME | \
- TRACE_GRAPH_PRINT_DURATION)
+ TRACE_GRAPH_PRINT_CPU | \
+ TRACE_GRAPH_PRINT_REL_TIME | \
+ TRACE_GRAPH_PRINT_DURATION | \
+ TRACE_GRAPH_PRINT_OVERHEAD | \
+ TRACE_GRAPH_PRINT_IRQS)
static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
{
@@ -318,6 +205,100 @@
else
trace_default_header(s);
}
+#endif /* else CONFIG_FUNCTION_GRAPH_TRACER */
+
+/*
+ * wakeup uses its own tracer function to keep the overhead down:
+ */
+static void
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct pt_regs *pt_regs)
+{
+ struct trace_array *tr = wakeup_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ int pc;
+
+ if (!func_prolog_preempt_disable(tr, &data, &pc))
+ return;
+
+ local_irq_save(flags);
+ trace_function(tr, ip, parent_ip, flags, pc);
+ local_irq_restore(flags);
+
+ atomic_dec(&data->disabled);
+ preempt_enable_notrace();
+}
+
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
+{
+ int ret;
+
+ /* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
+ if (function_enabled || (!set && !(tr->trace_flags & TRACE_ITER_FUNCTION)))
+ return 0;
+
+ if (graph)
+ ret = register_ftrace_graph(&fgraph_wakeup_ops);
+ else
+ ret = register_ftrace_function(tr->ops);
+
+ if (!ret)
+ function_enabled = true;
+
+ return ret;
+}
+
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
+{
+ if (!function_enabled)
+ return;
+
+ if (graph)
+ unregister_ftrace_graph(&fgraph_wakeup_ops);
+ else
+ unregister_ftrace_function(tr->ops);
+
+ function_enabled = false;
+}
+
+static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
+{
+ if (!(mask & TRACE_ITER_FUNCTION))
+ return 0;
+
+ if (set)
+ register_wakeup_function(tr, is_graph(tr), 1);
+ else
+ unregister_wakeup_function(tr, is_graph(tr));
+ return 1;
+}
+#else /* CONFIG_FUNCTION_TRACER */
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
+{
+ return 0;
+}
+static void unregister_wakeup_function(struct trace_array *tr, int graph) { }
+static int wakeup_function_set(struct trace_array *tr, u32 mask, int set)
+{
+ return 0;
+}
+#endif /* else CONFIG_FUNCTION_TRACER */
+
+#ifndef CONFIG_FUNCTION_GRAPH_TRACER
+static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+{
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void wakeup_trace_open(struct trace_iterator *iter) { }
+static void wakeup_trace_close(struct trace_iterator *iter) { }
+
+static void wakeup_print_header(struct seq_file *s)
+{
+ trace_default_header(s);
+}
+#endif /* !CONFIG_FUNCTION_GRAPH_TRACER */
static void
__trace_function(struct trace_array *tr,
@@ -329,34 +310,42 @@
else
trace_function(tr, ip, parent_ip, flags, pc);
}
-#else
-#define __trace_function trace_function
-static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
+static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
{
- return TRACE_TYPE_UNHANDLED;
+ struct tracer *tracer = tr->current_trace;
+
+ if (wakeup_function_set(tr, mask, set))
+ return 0;
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if (mask & TRACE_ITER_DISPLAY_GRAPH)
+ return wakeup_display_graph(tr, set);
+#endif
+
+ return trace_keep_overwrite(tracer, mask, set);
}
-static void wakeup_trace_open(struct trace_iterator *iter) { }
-static void wakeup_trace_close(struct trace_iterator *iter) { }
+static int start_func_tracer(struct trace_array *tr, int graph)
+{
+ int ret;
-#ifdef CONFIG_FUNCTION_TRACER
-static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
-{
- return -1;
+ ret = register_wakeup_function(tr, graph, 0);
+
+ if (!ret && tracing_is_enabled())
+ tracer_enabled = 1;
+ else
+ tracer_enabled = 0;
+
+ return ret;
}
-static void wakeup_graph_return(struct ftrace_graph_ret *trace) { }
-static void wakeup_print_header(struct seq_file *s)
+
+static void stop_func_tracer(struct trace_array *tr, int graph)
{
- trace_default_header(s);
+ tracer_enabled = 0;
+
+ unregister_wakeup_function(tr, graph);
}
-#else
-static void wakeup_print_header(struct seq_file *s)
-{
- trace_latency_header(s);
-}
-#endif /* CONFIG_FUNCTION_TRACER */
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/*
* Should this new latency be reported/recorded?
@@ -486,6 +475,7 @@
__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
+ __trace_stack(wakeup_trace, flags, 0, pc);
T0 = data->preempt_timestamp;
T1 = ftrace_now(cpu);
@@ -496,7 +486,7 @@
if (likely(!is_tracing_stopped())) {
wakeup_trace->max_latency = delta;
- update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu, NULL);
}
out_unlock:
@@ -589,14 +579,14 @@
else
tracing_dl = 0;
- wakeup_task = p;
- get_task_struct(wakeup_task);
+ wakeup_task = get_task_struct(p);
local_save_flags(flags);
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
data->preempt_timestamp = ftrace_now(cpu);
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
+ __trace_stack(wakeup_trace, flags, 0, pc);
/*
* We must be careful in using CALLER_ADDR2. But since wake_up
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 11e9daa..69ee8ef 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -741,6 +741,11 @@
return trace_graph_entry(trace);
}
+static struct fgraph_ops fgraph_ops __initdata = {
+ .entryfunc = &trace_graph_entry_watchdog,
+ .retfunc = &trace_graph_return,
+};
+
/*
* Pretty much the same than for the function tracer from which the selftest
* has been borrowed.
@@ -765,8 +770,7 @@
*/
tracing_reset_online_cpus(&tr->trace_buffer);
set_graph_array(tr);
- ret = register_ftrace_graph(&trace_graph_return,
- &trace_graph_entry_watchdog);
+ ret = register_ftrace_graph(&fgraph_ops);
if (ret) {
warn_failed_init_tracer(trace, ret);
goto out;
@@ -788,7 +792,10 @@
/* check the trace buffer */
ret = trace_test_buffer(&tr->trace_buffer, &count);
- trace->reset(tr);
+ /* Need to also simulate the tr->reset to remove this fgraph_ops */
+ tracing_stop_cmdline_record();
+ unregister_ftrace_graph(&fgraph_ops);
+
tracing_start();
if (!ret && !count) {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4237eba..4df9a20 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -5,6 +5,7 @@
*/
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
+#include <linux/security.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
@@ -18,44 +19,32 @@
#include "trace.h"
-static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
- { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
-unsigned stack_trace_index[STACK_TRACE_ENTRIES];
+#define STACK_TRACE_ENTRIES 500
-/*
- * Reserve one entry for the passed in ip. This will allow
- * us to remove most or all of the stack size overhead
- * added by the stack tracer itself.
- */
-struct stack_trace stack_trace_max = {
- .max_entries = STACK_TRACE_ENTRIES - 1,
- .entries = &stack_dump_trace[0],
-};
+static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES];
+static unsigned stack_trace_index[STACK_TRACE_ENTRIES];
-unsigned long stack_trace_max_size;
-arch_spinlock_t stack_trace_max_lock =
+static unsigned int stack_trace_nr_entries;
+static unsigned long stack_trace_max_size;
+static arch_spinlock_t stack_trace_max_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
DEFINE_PER_CPU(int, disable_stack_tracer);
static DEFINE_MUTEX(stack_sysctl_mutex);
int stack_tracer_enabled;
-static int last_stack_tracer_enabled;
-void stack_trace_print(void)
+static void print_max_stack(void)
{
long i;
int size;
pr_emerg(" Depth Size Location (%d entries)\n"
" ----- ---- --------\n",
- stack_trace_max.nr_entries);
+ stack_trace_nr_entries);
- for (i = 0; i < stack_trace_max.nr_entries; i++) {
- if (stack_dump_trace[i] == ULONG_MAX)
- break;
- if (i+1 == stack_trace_max.nr_entries ||
- stack_dump_trace[i+1] == ULONG_MAX)
+ for (i = 0; i < stack_trace_nr_entries; i++) {
+ if (i + 1 == stack_trace_nr_entries)
size = stack_trace_index[i];
else
size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -66,15 +55,104 @@
}
/*
- * When arch-specific code overrides this function, the following
- * data should be filled up, assuming stack_trace_max_lock is held to
- * prevent concurrent updates.
- * stack_trace_index[]
- * stack_trace_max
- * stack_trace_max_size
+ * The stack tracer looks for a maximum stack at each call from a function. It
+ * registers a callback from ftrace, and in that callback it examines the stack
+ * size. It determines the stack size from the variable passed in, which is the
+ * address of a local variable in the stack_trace_call() callback function.
+ * The stack size is calculated by the address of the local variable to the top
+ * of the current stack. If that size is smaller than the currently saved max
+ * stack size, nothing more is done.
+ *
+ * If the size of the stack is greater than the maximum recorded size, then the
+ * following algorithm takes place.
+ *
+ * For architectures (like x86) that store the function's return address before
+ * saving the function's local variables, the stack will look something like
+ * this:
+ *
+ * [ top of stack ]
+ * 0: sys call entry frame
+ * 10: return addr to entry code
+ * 11: start of sys_foo frame
+ * 20: return addr to sys_foo
+ * 21: start of kernel_func_bar frame
+ * 30: return addr to kernel_func_bar
+ * 31: [ do trace stack here ]
+ *
+ * The save_stack_trace() is called returning all the functions it finds in the
+ * current stack. Which would be (from the bottom of the stack to the top):
+ *
+ * return addr to kernel_func_bar
+ * return addr to sys_foo
+ * return addr to entry code
+ *
+ * Now to figure out how much each of these functions' local variable size is,
+ * a search of the stack is made to find these values. When a match is made, it
+ * is added to the stack_dump_trace[] array. The offset into the stack is saved
+ * in the stack_trace_index[] array. The above example would show:
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 30
+ * return addr to sys_foo | 20
+ * return addr to entry | 10
+ *
+ * The print_max_stack() function above, uses these values to print the size of
+ * each function's portion of the stack.
+ *
+ * for (i = 0; i < nr_entries; i++) {
+ * size = i == nr_entries - 1 ? stack_trace_index[i] :
+ * stack_trace_index[i] - stack_trace_index[i+1]
+ * print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]);
+ * }
+ *
+ * The above shows
+ *
+ * depth size location
+ * ----- ---- --------
+ * 0 30 10 kernel_func_bar
+ * 1 20 10 sys_foo
+ * 2 10 10 entry code
+ *
+ * Now for architectures that might save the return address after the functions
+ * local variables (saving the link register before calling nested functions),
+ * this will cause the stack to look a little different:
+ *
+ * [ top of stack ]
+ * 0: sys call entry frame
+ * 10: start of sys_foo_frame
+ * 19: return addr to entry code << lr saved before calling kernel_func_bar
+ * 20: start of kernel_func_bar frame
+ * 29: return addr to sys_foo_frame << lr saved before calling next function
+ * 30: [ do trace stack here ]
+ *
+ * Although the functions returned by save_stack_trace() may be the same, the
+ * placement in the stack will be different. Using the same algorithm as above
+ * would yield:
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 30
+ * return addr to sys_foo | 29
+ * return addr to entry | 19
+ *
+ * Where the mapping is off by one:
+ *
+ * kernel_func_bar stack frame size is 29 - 19 not 30 - 29!
+ *
+ * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the
+ * values in stack_trace_index[] are shifted by one to and the number of
+ * stack trace entries is decremented by one.
+ *
+ * stack_dump_trace[] | stack_trace_index[]
+ * ------------------ + -------------------
+ * return addr to kernel_func_bar | 29
+ * return addr to sys_foo | 19
+ *
+ * Although the entry function is not displayed, the first function (sys_foo)
+ * will still include the stack size of it.
*/
-void __weak
-check_stack(unsigned long ip, unsigned long *stack)
+static void check_stack(unsigned long ip, unsigned long *stack)
{
unsigned long this_size, flags; unsigned long *p, *top, *start;
static int tracer_frame;
@@ -110,13 +188,12 @@
stack_trace_max_size = this_size;
- stack_trace_max.nr_entries = 0;
- stack_trace_max.skip = 3;
-
- save_stack_trace(&stack_trace_max);
+ stack_trace_nr_entries = stack_trace_save(stack_dump_trace,
+ ARRAY_SIZE(stack_dump_trace) - 1,
+ 0);
/* Skip over the overhead of the stack tracer itself */
- for (i = 0; i < stack_trace_max.nr_entries; i++) {
+ for (i = 0; i < stack_trace_nr_entries; i++) {
if (stack_dump_trace[i] == ip)
break;
}
@@ -125,7 +202,7 @@
* Some archs may not have the passed in ip in the dump.
* If that happens, we need to show everything.
*/
- if (i == stack_trace_max.nr_entries)
+ if (i == stack_trace_nr_entries)
i = 0;
/*
@@ -143,15 +220,13 @@
* loop will only happen once. This code only takes place
* on a new max, so it is far from a fast path.
*/
- while (i < stack_trace_max.nr_entries) {
+ while (i < stack_trace_nr_entries) {
int found = 0;
stack_trace_index[x] = this_size;
p = start;
- for (; p < top && i < stack_trace_max.nr_entries; p++) {
- if (stack_dump_trace[i] == ULONG_MAX)
- break;
+ for (; p < top && i < stack_trace_nr_entries; p++) {
/*
* The READ_ONCE_NOCHECK is used to let KASAN know that
* this is not a stack-out-of-bounds error.
@@ -182,12 +257,24 @@
i++;
}
- stack_trace_max.nr_entries = x;
- for (; x < i; x++)
- stack_dump_trace[x] = ULONG_MAX;
+#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER
+ /*
+ * Some archs will store the link register before calling
+ * nested functions. This means the saved return address
+ * comes after the local storage, and we need to shift
+ * for that.
+ */
+ if (x > 1) {
+ memmove(&stack_trace_index[0], &stack_trace_index[1],
+ sizeof(stack_trace_index[0]) * (x - 1));
+ x--;
+ }
+#endif
+
+ stack_trace_nr_entries = x;
if (task_stack_end_corrupted(current)) {
- stack_trace_print();
+ print_max_stack();
BUG();
}
@@ -286,7 +373,7 @@
{
long n = *pos - 1;
- if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX)
+ if (n >= stack_trace_nr_entries)
return NULL;
m->private = (void *)n;
@@ -350,7 +437,7 @@
seq_printf(m, " Depth Size Location"
" (%d entries)\n"
" ----- ---- --------\n",
- stack_trace_max.nr_entries);
+ stack_trace_nr_entries);
if (!stack_tracer_enabled && !stack_trace_max_size)
print_disabled(m);
@@ -360,12 +447,10 @@
i = *(long *)v;
- if (i >= stack_trace_max.nr_entries ||
- stack_dump_trace[i] == ULONG_MAX)
+ if (i >= stack_trace_nr_entries)
return 0;
- if (i+1 == stack_trace_max.nr_entries ||
- stack_dump_trace[i+1] == ULONG_MAX)
+ if (i + 1 == stack_trace_nr_entries)
size = stack_trace_index[i];
else
size = stack_trace_index[i] - stack_trace_index[i+1];
@@ -386,6 +471,12 @@
static int stack_trace_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &stack_trace_seq_ops);
}
@@ -403,6 +494,7 @@
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
inode, file);
}
@@ -422,23 +514,21 @@
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ int was_enabled;
int ret;
mutex_lock(&stack_sysctl_mutex);
+ was_enabled = !!stack_tracer_enabled;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret || !write ||
- (last_stack_tracer_enabled == !!stack_tracer_enabled))
+ if (ret || !write || (was_enabled == !!stack_tracer_enabled))
goto out;
- last_stack_tracer_enabled = !!stack_tracer_enabled;
-
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
else
unregister_ftrace_function(&trace_ops);
-
out:
mutex_unlock(&stack_sysctl_mutex);
return ret;
@@ -448,11 +538,12 @@
static __init int enable_stacktrace(char *str)
{
- if (strncmp(str, "_filter=", 8) == 0)
- strncpy(stack_trace_filter_buf, str+8, COMMAND_LINE_SIZE);
+ int len;
+
+ if ((len = str_has_prefix(str, "_filter=")))
+ strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
stack_tracer_enabled = 1;
- last_stack_tracer_enabled = 1;
return 1;
}
__setup("stacktrace", enable_stacktrace);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 75bf1bc..9ab0a1a 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -9,7 +9,7 @@
*
*/
-
+#include <linux/security.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
@@ -238,6 +238,10 @@
struct seq_file *m;
struct stat_session *session = inode->i_private;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = stat_seq_init(session);
if (ret)
return ret;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d..fa8fbff 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@
struct ring_buffer_event *event;
struct ring_buffer *buffer;
unsigned long irq_flags;
+ unsigned long args[6];
int pc;
int syscall_nr;
int size;
@@ -347,7 +348,8 @@
entry = ring_buffer_event_data(event);
entry->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
@@ -583,6 +585,7 @@
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
+ unsigned long args[6];
bool valid_prog_array;
int syscall_nr;
int rctx;
@@ -613,8 +616,8 @@
return;
rec->nr = syscall_nr;
- syscall_get_arguments(current, regs, 0, sys_data->nb_args,
- (unsigned long *)&rec->args);
+ syscall_get_arguments(current, regs, args);
+ memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e696667..352073d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -5,8 +5,10 @@
* Copyright (C) IBM Corporation, 2010-2012
* Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
*/
-#define pr_fmt(fmt) "trace_kprobe: " fmt
+#define pr_fmt(fmt) "trace_uprobe: " fmt
+#include <linux/security.h>
+#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/uprobes.h>
@@ -14,7 +16,9 @@
#include <linux/string.h>
#include <linux/rculist.h>
+#include "trace_dynevent.h"
#include "trace_probe.h"
+#include "trace_probe_tmpl.h"
#define UPROBE_EVENT_SYSTEM "uprobes"
@@ -36,21 +40,56 @@
struct list_head perf_events;
};
+static int trace_uprobe_create(int argc, const char **argv);
+static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_uprobe_release(struct dyn_event *ev);
+static bool trace_uprobe_is_busy(struct dyn_event *ev);
+static bool trace_uprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev);
+
+static struct dyn_event_operations trace_uprobe_ops = {
+ .create = trace_uprobe_create,
+ .show = trace_uprobe_show,
+ .is_busy = trace_uprobe_is_busy,
+ .free = trace_uprobe_release,
+ .match = trace_uprobe_match,
+};
+
/*
* uprobe event core functions
*/
struct trace_uprobe {
- struct list_head list;
+ struct dyn_event devent;
struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
struct path path;
struct inode *inode;
char *filename;
unsigned long offset;
+ unsigned long ref_ctr_offset;
unsigned long nhit;
struct trace_probe tp;
};
+static bool is_trace_uprobe(struct dyn_event *ev)
+{
+ return ev->ops == &trace_uprobe_ops;
+}
+
+static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev)
+{
+ return container_of(ev, struct trace_uprobe, devent);
+}
+
+/**
+ * for_each_trace_uprobe - iterate over the trace_uprobe list
+ * @pos: the struct trace_uprobe * for each entry
+ * @dpos: the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_uprobe(pos, dpos) \
+ for_each_dyn_event(dpos) \
+ if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos)))
+
#define SIZEOF_TRACE_UPROBE(n) \
(offsetof(struct trace_uprobe, tp.args) + \
(sizeof(struct probe_arg) * (n)))
@@ -58,9 +97,6 @@
static int register_uprobe_event(struct trace_uprobe *tu);
static int unregister_uprobe_event(struct trace_uprobe *tu);
-static DEFINE_MUTEX(uprobe_lock);
-static LIST_HEAD(uprobe_list);
-
struct uprobe_dispatch_data {
struct trace_uprobe *tu;
unsigned long bp_addr;
@@ -98,74 +134,84 @@
/*
* Uprobes-specific fetch functions
*/
-#define DEFINE_FETCH_stack(type) \
-static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
- void *offset, void *dest) \
-{ \
- *(type *)dest = (type)get_user_stack_nth(regs, \
- ((unsigned long)offset)); \
-}
-DEFINE_BASIC_FETCH_FUNCS(stack)
-/* No string on the stack entry */
-#define fetch_stack_string NULL
-#define fetch_stack_string_size NULL
+static nokprobe_inline int
+probe_mem_read(void *dest, void *src, size_t size)
+{
+ void __user *vaddr = (void __force __user *)src;
-#define DEFINE_FETCH_memory(type) \
-static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
- void *addr, void *dest) \
-{ \
- type retval; \
- void __user *vaddr = (void __force __user *) addr; \
- \
- if (copy_from_user(&retval, vaddr, sizeof(type))) \
- *(type *)dest = 0; \
- else \
- *(type *) dest = retval; \
+ return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
}
-DEFINE_BASIC_FETCH_FUNCS(memory)
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+ return probe_mem_read(dest, src, size);
+}
+
/*
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
* length and relative data location.
*/
-static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
- void *addr, void *dest)
+static nokprobe_inline int
+fetch_store_string(unsigned long addr, void *dest, void *base)
{
long ret;
- u32 rloc = *(u32 *)dest;
- int maxlen = get_rloc_len(rloc);
- u8 *dst = get_rloc_data(dest);
+ u32 loc = *(u32 *)dest;
+ int maxlen = get_loc_len(loc);
+ u8 *dst = get_loc_data(dest, base);
void __user *src = (void __force __user *) addr;
- if (!maxlen)
- return;
+ if (unlikely(!maxlen))
+ return -ENOMEM;
- ret = strncpy_from_user(dst, src, maxlen);
- if (ret == maxlen)
- dst[--ret] = '\0';
-
- if (ret < 0) { /* Failed to fetch string */
- ((u8 *)get_rloc_data(dest))[0] = '\0';
- *(u32 *)dest = make_data_rloc(0, get_rloc_offs(rloc));
- } else {
- *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(rloc));
+ if (addr == FETCH_TOKEN_COMM)
+ ret = strlcpy(dst, current->comm, maxlen);
+ else
+ ret = strncpy_from_user(dst, src, maxlen);
+ if (ret >= 0) {
+ if (ret == maxlen)
+ dst[ret - 1] = '\0';
+ else
+ /*
+ * Include the terminating null byte. In this case it
+ * was copied by strncpy_from_user but not accounted
+ * for in ret.
+ */
+ ret++;
+ *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
}
+
+ return ret;
}
-static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
- void *addr, void *dest)
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+ return fetch_store_string(addr, dest, base);
+}
+
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen(unsigned long addr)
{
int len;
void __user *vaddr = (void __force __user *) addr;
- len = strnlen_user(vaddr, MAX_STRING_SIZE);
-
- if (len == 0 || len > MAX_STRING_SIZE) /* Failed to check length */
- *(u32 *)dest = 0;
+ if (addr == FETCH_TOKEN_COMM)
+ len = strlen(current->comm) + 1;
else
- *(u32 *)dest = len;
+ len = strnlen_user(vaddr, MAX_STRING_SIZE);
+
+ return (len > MAX_STRING_SIZE) ? 0 : len;
}
-static unsigned long translate_user_vaddr(void *file_offset)
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+ return fetch_store_strlen(addr);
+}
+
+static unsigned long translate_user_vaddr(unsigned long file_offset)
{
unsigned long base_addr;
struct uprobe_dispatch_data *udd;
@@ -173,44 +219,50 @@
udd = (void *) current->utask->vaddr;
base_addr = udd->bp_addr - udd->tu->offset;
- return base_addr + (unsigned long)file_offset;
+ return base_addr + file_offset;
}
-#define DEFINE_FETCH_file_offset(type) \
-static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \
- void *offset, void *dest)\
-{ \
- void *vaddr = (void *)translate_user_vaddr(offset); \
- \
- FETCH_FUNC_NAME(memory, type)(regs, vaddr, dest); \
+/* Note that we don't verify it, since the code does not come from user space */
+static int
+process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
+ void *base)
+{
+ unsigned long val;
+
+ /* 1st stage: get value from context */
+ switch (code->op) {
+ case FETCH_OP_REG:
+ val = regs_get_register(regs, code->param);
+ break;
+ case FETCH_OP_STACK:
+ val = get_user_stack_nth(regs, code->param);
+ break;
+ case FETCH_OP_STACKP:
+ val = user_stack_pointer(regs);
+ break;
+ case FETCH_OP_RETVAL:
+ val = regs_return_value(regs);
+ break;
+ case FETCH_OP_IMM:
+ val = code->immediate;
+ break;
+ case FETCH_OP_COMM:
+ val = FETCH_TOKEN_COMM;
+ break;
+ case FETCH_OP_DATA:
+ val = (unsigned long)code->data;
+ break;
+ case FETCH_OP_FOFFS:
+ val = translate_user_vaddr(code->immediate);
+ break;
+ default:
+ return -EILSEQ;
+ }
+ code++;
+
+ return process_fetch_insn_bottom(code, val, dest, base);
}
-DEFINE_BASIC_FETCH_FUNCS(file_offset)
-DEFINE_FETCH_file_offset(string)
-DEFINE_FETCH_file_offset(string_size)
-
-/* Fetch type information table */
-static const struct fetch_type uprobes_fetch_type_table[] = {
- /* Special types */
- [FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
- sizeof(u32), 1, "__data_loc char[]"),
- [FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
- string_size, sizeof(u32), 0, "u32"),
- /* Basic types */
- ASSIGN_FETCH_TYPE(u8, u8, 0),
- ASSIGN_FETCH_TYPE(u16, u16, 0),
- ASSIGN_FETCH_TYPE(u32, u32, 0),
- ASSIGN_FETCH_TYPE(u64, u64, 0),
- ASSIGN_FETCH_TYPE(s8, u8, 1),
- ASSIGN_FETCH_TYPE(s16, u16, 1),
- ASSIGN_FETCH_TYPE(s32, u32, 1),
- ASSIGN_FETCH_TYPE(s64, u64, 1),
- ASSIGN_FETCH_TYPE_ALIAS(x8, u8, u8, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
- ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
-
- ASSIGN_FETCH_TYPE_END
-};
+NOKPROBE_SYMBOL(process_fetch_insn)
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
{
@@ -229,6 +281,63 @@
return tu->consumer.ret_handler != NULL;
}
+static bool trace_uprobe_is_busy(struct dyn_event *ev)
+{
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
+
+ return trace_probe_is_enabled(&tu->tp);
+}
+
+static bool trace_uprobe_match_command_head(struct trace_uprobe *tu,
+ int argc, const char **argv)
+{
+ char buf[MAX_ARGSTR_LEN + 1];
+ int len;
+
+ if (!argc)
+ return true;
+
+ len = strlen(tu->filename);
+ if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':')
+ return false;
+
+ if (tu->ref_ctr_offset == 0)
+ snprintf(buf, sizeof(buf), "0x%0*lx",
+ (int)(sizeof(void *) * 2), tu->offset);
+ else
+ snprintf(buf, sizeof(buf), "0x%0*lx(0x%lx)",
+ (int)(sizeof(void *) * 2), tu->offset,
+ tu->ref_ctr_offset);
+ if (strcmp(buf, &argv[0][len + 1]))
+ return false;
+
+ argc--; argv++;
+
+ return trace_probe_match_command_args(&tu->tp, argc, argv);
+}
+
+static bool trace_uprobe_match(const char *system, const char *event,
+ int argc, const char **argv, struct dyn_event *ev)
+{
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
+
+ return strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+ (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) &&
+ trace_uprobe_match_command_head(tu, argc, argv);
+}
+
+static nokprobe_inline struct trace_uprobe *
+trace_uprobe_primary_from_call(struct trace_event_call *call)
+{
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return NULL;
+
+ return container_of(tp, struct trace_uprobe, tp);
+}
+
/*
* Allocate new trace_uprobe and initialize it (including uprobes).
*/
@@ -236,28 +345,17 @@
alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
{
struct trace_uprobe *tu;
-
- if (!event || !is_good_name(event))
- return ERR_PTR(-EINVAL);
-
- if (!group || !is_good_name(group))
- return ERR_PTR(-EINVAL);
+ int ret;
tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
if (!tu)
return ERR_PTR(-ENOMEM);
- tu->tp.call.class = &tu->tp.class;
- tu->tp.call.name = kstrdup(event, GFP_KERNEL);
- if (!tu->tp.call.name)
+ ret = trace_probe_init(&tu->tp, event, group);
+ if (ret < 0)
goto error;
- tu->tp.class.system = kstrdup(group, GFP_KERNEL);
- if (!tu->tp.class.system)
- goto error;
-
- INIT_LIST_HEAD(&tu->list);
- INIT_LIST_HEAD(&tu->tp.files);
+ dyn_event_init(&tu->devent, &trace_uprobe_ops);
tu->consumer.handler = uprobe_dispatcher;
if (is_ret)
tu->consumer.ret_handler = uretprobe_dispatcher;
@@ -265,68 +363,160 @@
return tu;
error:
- kfree(tu->tp.call.name);
kfree(tu);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
}
static void free_trace_uprobe(struct trace_uprobe *tu)
{
- int i;
-
- for (i = 0; i < tu->tp.nr_args; i++)
- traceprobe_free_probe_arg(&tu->tp.args[i]);
+ if (!tu)
+ return;
path_put(&tu->path);
- kfree(tu->tp.call.class->system);
- kfree(tu->tp.call.name);
+ trace_probe_cleanup(&tu->tp);
kfree(tu->filename);
kfree(tu);
}
static struct trace_uprobe *find_probe_event(const char *event, const char *group)
{
+ struct dyn_event *pos;
struct trace_uprobe *tu;
- list_for_each_entry(tu, &uprobe_list, list)
- if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
- strcmp(tu->tp.call.class->system, group) == 0)
+ for_each_trace_uprobe(tu, pos)
+ if (strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+ strcmp(trace_probe_group_name(&tu->tp), group) == 0)
return tu;
return NULL;
}
-/* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
+/* Unregister a trace_uprobe and probe_event */
static int unregister_trace_uprobe(struct trace_uprobe *tu)
{
int ret;
+ if (trace_probe_has_sibling(&tu->tp))
+ goto unreg;
+
ret = unregister_uprobe_event(tu);
if (ret)
return ret;
- list_del(&tu->list);
+unreg:
+ dyn_event_remove(&tu->devent);
+ trace_probe_unlink(&tu->tp);
free_trace_uprobe(tu);
return 0;
}
+static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig,
+ struct trace_uprobe *comp)
+{
+ struct trace_probe_event *tpe = orig->tp.event;
+ struct trace_probe *pos;
+ struct inode *comp_inode = d_real_inode(comp->path.dentry);
+ int i;
+
+ list_for_each_entry(pos, &tpe->probes, list) {
+ orig = container_of(pos, struct trace_uprobe, tp);
+ if (comp_inode != d_real_inode(orig->path.dentry) ||
+ comp->offset != orig->offset)
+ continue;
+
+ /*
+ * trace_probe_compare_arg_type() ensured that nr_args and
+ * each argument name and type are same. Let's compare comm.
+ */
+ for (i = 0; i < orig->tp.nr_args; i++) {
+ if (strcmp(orig->tp.args[i].comm,
+ comp->tp.args[i].comm))
+ break;
+ }
+
+ if (i == orig->tp.nr_args)
+ return true;
+ }
+
+ return false;
+}
+
+static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to)
+{
+ int ret;
+
+ ret = trace_probe_compare_arg_type(&tu->tp, &to->tp);
+ if (ret) {
+ /* Note that argument starts index = 2 */
+ trace_probe_log_set_index(ret + 1);
+ trace_probe_log_err(0, DIFF_ARG_TYPE);
+ return -EEXIST;
+ }
+ if (trace_uprobe_has_same_uprobe(to, tu)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, SAME_PROBE);
+ return -EEXIST;
+ }
+
+ /* Append to existing event */
+ ret = trace_probe_append(&tu->tp, &to->tp);
+ if (!ret)
+ dyn_event_add(&tu->devent);
+
+ return ret;
+}
+
+/*
+ * Uprobe with multiple reference counter is not allowed. i.e.
+ * If inode and offset matches, reference counter offset *must*
+ * match as well. Though, there is one exception: If user is
+ * replacing old trace_uprobe with new one(same group/event),
+ * then we allow same uprobe with new reference counter as far
+ * as the new one does not conflict with any other existing
+ * ones.
+ */
+static int validate_ref_ctr_offset(struct trace_uprobe *new)
+{
+ struct dyn_event *pos;
+ struct trace_uprobe *tmp;
+ struct inode *new_inode = d_real_inode(new->path.dentry);
+
+ for_each_trace_uprobe(tmp, pos) {
+ if (new_inode == d_real_inode(tmp->path.dentry) &&
+ new->offset == tmp->offset &&
+ new->ref_ctr_offset != tmp->ref_ctr_offset) {
+ pr_warn("Reference counter offset mismatch.");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
/* Register a trace_uprobe and probe_event */
static int register_trace_uprobe(struct trace_uprobe *tu)
{
struct trace_uprobe *old_tu;
int ret;
- mutex_lock(&uprobe_lock);
+ mutex_lock(&event_mutex);
+
+ ret = validate_ref_ctr_offset(tu);
+ if (ret)
+ goto end;
/* register as an event */
- old_tu = find_probe_event(trace_event_name(&tu->tp.call),
- tu->tp.call.class->system);
+ old_tu = find_probe_event(trace_probe_name(&tu->tp),
+ trace_probe_group_name(&tu->tp));
if (old_tu) {
- /* delete old event */
- ret = unregister_trace_uprobe(old_tu);
- if (ret)
- goto end;
+ if (is_ret_probe(tu) != is_ret_probe(old_tu)) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, DIFF_PROBE_TYPE);
+ ret = -EEXIST;
+ } else {
+ ret = append_trace_uprobe(tu, old_tu);
+ }
+ goto end;
}
ret = register_uprobe_event(tu);
@@ -335,10 +525,10 @@
goto end;
}
- list_add_tail(&tu->list, &uprobe_list);
+ dyn_event_add(&tu->devent);
end:
- mutex_unlock(&uprobe_lock);
+ mutex_unlock(&event_mutex);
return ret;
}
@@ -346,107 +536,109 @@
/*
* Argument syntax:
* - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
- *
- * - Remove uprobe: -:[GRP/]EVENT
*/
-static int create_trace_uprobe(int argc, char **argv)
+static int trace_uprobe_create(int argc, const char **argv)
{
struct trace_uprobe *tu;
- char *arg, *event, *group, *filename;
+ const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
+ char *arg, *filename, *rctr, *rctr_end, *tmp;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
- unsigned long offset;
- bool is_delete, is_return;
+ unsigned long offset, ref_ctr_offset;
+ bool is_return = false;
int i, ret;
ret = 0;
- is_delete = false;
- is_return = false;
- event = NULL;
- group = NULL;
+ ref_ctr_offset = 0;
- /* argc must be >= 1 */
- if (argv[0][0] == '-')
- is_delete = true;
- else if (argv[0][0] == 'r')
+ switch (argv[0][0]) {
+ case 'r':
is_return = true;
- else if (argv[0][0] != 'p') {
- pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
- return -EINVAL;
+ break;
+ case 'p':
+ break;
+ default:
+ return -ECANCELED;
}
- if (argv[0][1] == ':') {
+ if (argc < 2)
+ return -ECANCELED;
+
+ if (argv[0][1] == ':')
event = &argv[0][2];
- arg = strchr(event, '/');
- if (arg) {
- group = event;
- event = arg + 1;
- event[-1] = '\0';
+ if (!strchr(argv[1], '/'))
+ return -ECANCELED;
- if (strlen(group) == 0) {
- pr_info("Group name is not specified\n");
- return -EINVAL;
- }
- }
- if (strlen(event) == 0) {
- pr_info("Event name is not specified\n");
- return -EINVAL;
- }
- }
- if (!group)
- group = UPROBE_EVENT_SYSTEM;
+ filename = kstrdup(argv[1], GFP_KERNEL);
+ if (!filename)
+ return -ENOMEM;
- if (is_delete) {
- int ret;
-
- if (!event) {
- pr_info("Delete command needs an event name.\n");
- return -EINVAL;
- }
- mutex_lock(&uprobe_lock);
- tu = find_probe_event(event, group);
-
- if (!tu) {
- mutex_unlock(&uprobe_lock);
- pr_info("Event %s/%s doesn't exist.\n", group, event);
- return -ENOENT;
- }
- /* delete an event */
- ret = unregister_trace_uprobe(tu);
- mutex_unlock(&uprobe_lock);
- return ret;
- }
-
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
/* Find the last occurrence, in case the path contains ':' too. */
- arg = strrchr(argv[1], ':');
- if (!arg)
- return -EINVAL;
+ arg = strrchr(filename, ':');
+ if (!arg || !isdigit(arg[1])) {
+ kfree(filename);
+ return -ECANCELED;
+ }
+
+ trace_probe_log_init("trace_uprobe", argc, argv);
+ trace_probe_log_set_index(1); /* filename is the 2nd argument */
*arg++ = '\0';
- filename = argv[1];
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
- if (ret)
+ if (ret) {
+ trace_probe_log_err(0, FILE_NOT_FOUND);
+ kfree(filename);
+ trace_probe_log_clear();
return ret;
-
+ }
if (!d_is_reg(path.dentry)) {
+ trace_probe_log_err(0, NO_REGULAR_FILE);
ret = -EINVAL;
goto fail_address_parse;
}
- ret = kstrtoul(arg, 0, &offset);
- if (ret)
- goto fail_address_parse;
+ /* Parse reference counter offset if specified. */
+ rctr = strchr(arg, '(');
+ if (rctr) {
+ rctr_end = strchr(rctr, ')');
+ if (!rctr_end) {
+ ret = -EINVAL;
+ rctr_end = rctr + strlen(rctr);
+ trace_probe_log_err(rctr_end - filename,
+ REFCNT_OPEN_BRACE);
+ goto fail_address_parse;
+ } else if (rctr_end[1] != '\0') {
+ ret = -EINVAL;
+ trace_probe_log_err(rctr_end + 1 - filename,
+ BAD_REFCNT_SUFFIX);
+ goto fail_address_parse;
+ }
- argc -= 2;
- argv += 2;
+ *rctr++ = '\0';
+ *rctr_end = '\0';
+ ret = kstrtoul(rctr, 0, &ref_ctr_offset);
+ if (ret) {
+ trace_probe_log_err(rctr - filename, BAD_REFCNT);
+ goto fail_address_parse;
+ }
+ }
+
+ /* Parse uprobe offset. */
+ ret = kstrtoul(arg, 0, &offset);
+ if (ret) {
+ trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS);
+ goto fail_address_parse;
+ }
/* setup a probe */
- if (!event) {
+ trace_probe_log_set_index(0);
+ if (event) {
+ ret = traceprobe_parse_event_name(&event, &group, buf,
+ event - argv[0]);
+ if (ret)
+ goto fail_address_parse;
+ } else {
char *tail;
char *ptr;
@@ -465,131 +657,91 @@
kfree(tail);
}
+ argc -= 2;
+ argv += 2;
+
tu = alloc_trace_uprobe(group, event, argc, is_return);
if (IS_ERR(tu)) {
- pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
ret = PTR_ERR(tu);
+ /* This must return -ENOMEM otherwise there is a bug */
+ WARN_ON_ONCE(ret != -ENOMEM);
goto fail_address_parse;
}
tu->offset = offset;
+ tu->ref_ctr_offset = ref_ctr_offset;
tu->path = path;
- tu->filename = kstrdup(filename, GFP_KERNEL);
-
- if (!tu->filename) {
- pr_info("Failed to allocate filename.\n");
- ret = -ENOMEM;
- goto error;
- }
+ tu->filename = filename;
/* parse arguments */
- ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
-
- /* Increment count for freeing args in error case */
- tu->tp.nr_args++;
-
- /* Parse argument name */
- arg = strchr(argv[i], '=');
- if (arg) {
- *arg++ = '\0';
- parg->name = kstrdup(argv[i], GFP_KERNEL);
- } else {
- arg = argv[i];
- /* If argument name is omitted, set "argN" */
- snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
- parg->name = kstrdup(buf, GFP_KERNEL);
- }
-
- if (!parg->name) {
- pr_info("Failed to allocate argument[%d] name.\n", i);
+ tmp = kstrdup(argv[i], GFP_KERNEL);
+ if (!tmp) {
ret = -ENOMEM;
goto error;
}
- if (!is_good_name(parg->name)) {
- pr_info("Invalid argument[%d] name: %s\n", i, parg->name);
- ret = -EINVAL;
+ trace_probe_log_set_index(i + 2);
+ ret = traceprobe_parse_probe_arg(&tu->tp, i, tmp,
+ is_return ? TPARG_FL_RETURN : 0);
+ kfree(tmp);
+ if (ret)
goto error;
- }
-
- if (traceprobe_conflict_field_name(parg->name, tu->tp.args, i)) {
- pr_info("Argument[%d] name '%s' conflicts with "
- "another field.\n", i, argv[i]);
- ret = -EINVAL;
- goto error;
- }
-
- /* Parse fetch argument */
- ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
- is_return, false,
- uprobes_fetch_type_table);
- if (ret) {
- pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
- goto error;
- }
}
- ret = register_trace_uprobe(tu);
- if (ret)
+ ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu));
+ if (ret < 0)
goto error;
- return 0;
+
+ ret = register_trace_uprobe(tu);
+ if (!ret)
+ goto out;
error:
free_trace_uprobe(tu);
+out:
+ trace_probe_log_clear();
return ret;
fail_address_parse:
+ trace_probe_log_clear();
path_put(&path);
-
- pr_info("Failed to parse address or file.\n");
+ kfree(filename);
return ret;
}
-static int cleanup_all_probes(void)
+static int create_or_delete_trace_uprobe(int argc, char **argv)
{
- struct trace_uprobe *tu;
- int ret = 0;
+ int ret;
- mutex_lock(&uprobe_lock);
- while (!list_empty(&uprobe_list)) {
- tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
- ret = unregister_trace_uprobe(tu);
- if (ret)
- break;
- }
- mutex_unlock(&uprobe_lock);
- return ret;
+ if (argv[0][0] == '-')
+ return dyn_event_release(argc, argv, &trace_uprobe_ops);
+
+ ret = trace_uprobe_create(argc, (const char **)argv);
+ return ret == -ECANCELED ? -EINVAL : ret;
+}
+
+static int trace_uprobe_release(struct dyn_event *ev)
+{
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
+
+ return unregister_trace_uprobe(tu);
}
/* Probes listing interfaces */
-static void *probes_seq_start(struct seq_file *m, loff_t *pos)
+static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
{
- mutex_lock(&uprobe_lock);
- return seq_list_start(&uprobe_list, *pos);
-}
-
-static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
- return seq_list_next(v, &uprobe_list, pos);
-}
-
-static void probes_seq_stop(struct seq_file *m, void *v)
-{
- mutex_unlock(&uprobe_lock);
-}
-
-static int probes_seq_show(struct seq_file *m, void *v)
-{
- struct trace_uprobe *tu = v;
+ struct trace_uprobe *tu = to_trace_uprobe(ev);
char c = is_ret_probe(tu) ? 'r' : 'p';
int i;
- seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system,
- trace_event_name(&tu->tp.call), tu->filename,
+ seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, trace_probe_group_name(&tu->tp),
+ trace_probe_name(&tu->tp), tu->filename,
(int)(sizeof(void *) * 2), tu->offset);
+ if (tu->ref_ctr_offset)
+ seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
+
for (i = 0; i < tu->tp.nr_args; i++)
seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
@@ -597,19 +749,33 @@
return 0;
}
+static int probes_seq_show(struct seq_file *m, void *v)
+{
+ struct dyn_event *ev = v;
+
+ if (!is_trace_uprobe(ev))
+ return 0;
+
+ return trace_uprobe_show(m, ev);
+}
+
static const struct seq_operations probes_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
- .show = probes_seq_show
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
+ .show = probes_seq_show
};
static int probes_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
- ret = cleanup_all_probes();
+ ret = dyn_events_release_all(&trace_uprobe_ops);
if (ret)
return ret;
}
@@ -620,7 +786,8 @@
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe);
+ return trace_parse_run_command(file, buffer, count, ppos,
+ create_or_delete_trace_uprobe);
}
static const struct file_operations uprobe_events_ops = {
@@ -635,22 +802,33 @@
/* Probes profiling interfaces */
static int probes_profile_seq_show(struct seq_file *m, void *v)
{
- struct trace_uprobe *tu = v;
+ struct dyn_event *ev = v;
+ struct trace_uprobe *tu;
+ if (!is_trace_uprobe(ev))
+ return 0;
+
+ tu = to_trace_uprobe(ev);
seq_printf(m, " %s %-44s %15lu\n", tu->filename,
- trace_event_name(&tu->tp.call), tu->nhit);
+ trace_probe_name(&tu->tp), tu->nhit);
return 0;
}
static const struct seq_operations profile_seq_op = {
- .start = probes_seq_start,
- .next = probes_seq_next,
- .stop = probes_seq_stop,
+ .start = dyn_event_seq_start,
+ .next = dyn_event_seq_next,
+ .stop = dyn_event_seq_stop,
.show = probes_profile_seq_show
};
static int profile_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &profile_seq_op);
}
@@ -764,7 +942,7 @@
struct ring_buffer *buffer;
void *data;
int size, esize;
- struct trace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
WARN_ON(call != trace_file->event_call);
@@ -806,7 +984,7 @@
return 0;
rcu_read_lock();
- list_for_each_entry_rcu(link, &tu->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tu->tp)
__uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
rcu_read_unlock();
@@ -820,7 +998,7 @@
struct event_file_link *link;
rcu_read_lock();
- list_for_each_entry_rcu(link, &tu->tp.files, list)
+ trace_probe_for_each_link_rcu(link, &tu->tp)
__uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
rcu_read_unlock();
}
@@ -833,29 +1011,27 @@
struct trace_seq *s = &iter->seq;
struct trace_uprobe *tu;
u8 *data;
- int i;
entry = (struct uprobe_trace_entry_head *)iter->ent;
- tu = container_of(event, struct trace_uprobe, tp.call.event);
+ tu = trace_uprobe_primary_from_call(
+ container_of(event, struct trace_event_call, event));
+ if (unlikely(!tu))
+ goto out;
if (is_ret_probe(tu)) {
trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
- trace_event_name(&tu->tp.call),
+ trace_probe_name(&tu->tp),
entry->vaddr[1], entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, true);
} else {
trace_seq_printf(s, "%s: (0x%lx)",
- trace_event_name(&tu->tp.call),
+ trace_probe_name(&tu->tp),
entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, false);
}
- for (i = 0; i < tu->tp.nr_args; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
-
- if (!parg->type->print(s, parg->name, data + parg->offset, entry))
- goto out;
- }
+ if (print_probe_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0)
+ goto out;
trace_seq_putc(s, '\n');
@@ -867,33 +1043,71 @@
enum uprobe_filter_ctx ctx,
struct mm_struct *mm);
-static int
-probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
- filter_func_t filter)
+static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter)
{
- bool enabled = trace_probe_is_enabled(&tu->tp);
- struct event_file_link *link = NULL;
int ret;
+ tu->consumer.filter = filter;
+ tu->inode = d_real_inode(tu->path.dentry);
+
+ if (tu->ref_ctr_offset)
+ ret = uprobe_register_refctr(tu->inode, tu->offset,
+ tu->ref_ctr_offset, &tu->consumer);
+ else
+ ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+
+ if (ret)
+ tu->inode = NULL;
+
+ return ret;
+}
+
+static void __probe_event_disable(struct trace_probe *tp)
+{
+ struct trace_probe *pos;
+ struct trace_uprobe *tu;
+
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ tu = container_of(pos, struct trace_uprobe, tp);
+ if (!tu->inode)
+ continue;
+
+ WARN_ON(!uprobe_filter_is_empty(&tu->filter));
+
+ uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
+ tu->inode = NULL;
+ }
+}
+
+static int probe_event_enable(struct trace_event_call *call,
+ struct trace_event_file *file, filter_func_t filter)
+{
+ struct trace_probe *pos, *tp;
+ struct trace_uprobe *tu;
+ bool enabled;
+ int ret;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+ enabled = trace_probe_is_enabled(tp);
+
+ /* This may also change "enabled" state */
if (file) {
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(tp, TP_FLAG_PROFILE))
return -EINTR;
- link = kmalloc(sizeof(*link), GFP_KERNEL);
- if (!link)
- return -ENOMEM;
-
- link->file = file;
- list_add_tail_rcu(&link->list, &tu->tp.files);
-
- tu->tp.flags |= TP_FLAG_TRACE;
+ ret = trace_probe_add_file(tp, file);
+ if (ret < 0)
+ return ret;
} else {
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(tp, TP_FLAG_TRACE))
return -EINTR;
- tu->tp.flags |= TP_FLAG_PROFILE;
+ trace_probe_set_flag(tp, TP_FLAG_PROFILE);
}
+ tu = container_of(tp, struct trace_uprobe, tp);
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
if (enabled)
@@ -903,11 +1117,14 @@
if (ret)
goto err_flags;
- tu->consumer.filter = filter;
- tu->inode = d_real_inode(tu->path.dentry);
- ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
- if (ret)
- goto err_buffer;
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ tu = container_of(pos, struct trace_uprobe, tp);
+ ret = trace_uprobe_enable(tu, filter);
+ if (ret) {
+ __probe_event_disable(tp);
+ goto err_buffer;
+ }
+ }
return 0;
@@ -915,52 +1132,48 @@
uprobe_buffer_disable();
err_flags:
- if (file) {
- list_del(&link->list);
- kfree(link);
- tu->tp.flags &= ~TP_FLAG_TRACE;
- } else {
- tu->tp.flags &= ~TP_FLAG_PROFILE;
- }
+ if (file)
+ trace_probe_remove_file(tp, file);
+ else
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
return ret;
}
-static void
-probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
+static void probe_event_disable(struct trace_event_call *call,
+ struct trace_event_file *file)
{
- if (!trace_probe_is_enabled(&tu->tp))
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return;
+
+ if (!trace_probe_is_enabled(tp))
return;
if (file) {
- struct event_file_link *link;
-
- link = find_event_file_link(&tu->tp, file);
- if (!link)
+ if (trace_probe_remove_file(tp, file) < 0)
return;
- list_del_rcu(&link->list);
- /* synchronize with u{,ret}probe_trace_func */
- synchronize_rcu();
- kfree(link);
-
- if (!list_empty(&tu->tp.files))
+ if (trace_probe_is_enabled(tp))
return;
- }
+ } else
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
- WARN_ON(!uprobe_filter_is_empty(&tu->filter));
-
- uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
- tu->inode = NULL;
- tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
-
+ __probe_event_disable(tp);
uprobe_buffer_disable();
}
static int uprobe_event_define_fields(struct trace_event_call *event_call)
{
- int ret, i, size;
+ int ret, size;
struct uprobe_trace_entry_head field;
- struct trace_uprobe *tu = event_call->data;
+ struct trace_uprobe *tu;
+
+ tu = trace_uprobe_primary_from_call(event_call);
+ if (unlikely(!tu))
+ return -ENODEV;
if (is_ret_probe(tu)) {
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
@@ -970,19 +1183,8 @@
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
size = SIZEOF_TRACE_ENTRY(false);
}
- /* Set argument names as fields */
- for (i = 0; i < tu->tp.nr_args; i++) {
- struct probe_arg *parg = &tu->tp.args[i];
- ret = trace_define_field(event_call, parg->type->fmttype,
- parg->name, size + parg->offset,
- parg->type->size, parg->type->is_signed,
- FILTER_OTHER);
-
- if (ret)
- return ret;
- }
- return 0;
+ return traceprobe_define_arg_fields(event_call, size, &tu->tp);
}
#ifdef CONFIG_PERF_EVENTS
@@ -1064,6 +1266,27 @@
return err;
}
+static int uprobe_perf_multi_call(struct trace_event_call *call,
+ struct perf_event *event,
+ int (*op)(struct trace_uprobe *tu, struct perf_event *event))
+{
+ struct trace_probe *pos, *tp;
+ struct trace_uprobe *tu;
+ int ret = 0;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+
+ list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+ tu = container_of(pos, struct trace_uprobe, tp);
+ ret = op(tu, event);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
static bool uprobe_perf_filter(struct uprobe_consumer *uc,
enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
@@ -1082,7 +1305,7 @@
unsigned long func, struct pt_regs *regs,
struct uprobe_cpu_buffer *ucb, int dsize)
{
- struct trace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
struct uprobe_trace_entry_head *entry;
struct hlist_head *head;
void *data;
@@ -1177,30 +1400,29 @@
trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
void *data)
{
- struct trace_uprobe *tu = event->data;
struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
- return probe_event_enable(tu, file, NULL);
+ return probe_event_enable(event, file, NULL);
case TRACE_REG_UNREGISTER:
- probe_event_disable(tu, file);
+ probe_event_disable(event, file);
return 0;
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
- return probe_event_enable(tu, NULL, uprobe_perf_filter);
+ return probe_event_enable(event, NULL, uprobe_perf_filter);
case TRACE_REG_PERF_UNREGISTER:
- probe_event_disable(tu, NULL);
+ probe_event_disable(event, NULL);
return 0;
case TRACE_REG_PERF_OPEN:
- return uprobe_perf_open(tu, data);
+ return uprobe_perf_multi_call(event, data, uprobe_perf_open);
case TRACE_REG_PERF_CLOSE:
- return uprobe_perf_close(tu, data);
+ return uprobe_perf_multi_call(event, data, uprobe_perf_close);
#endif
default:
@@ -1233,13 +1455,13 @@
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
ucb = uprobe_buffer_get();
- store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+ store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
ret |= uprobe_trace_func(tu, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
ret |= uprobe_perf_func(tu, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);
@@ -1268,13 +1490,13 @@
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
ucb = uprobe_buffer_get();
- store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
+ store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
- if (tu->tp.flags & TP_FLAG_TRACE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
uretprobe_trace_func(tu, func, regs, ucb, dsize);
#ifdef CONFIG_PERF_EVENTS
- if (tu->tp.flags & TP_FLAG_PROFILE)
+ if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
uretprobe_perf_func(tu, func, regs, ucb, dsize);
#endif
uprobe_buffer_put(ucb);
@@ -1285,62 +1507,33 @@
.trace = print_uprobe_event
};
-static inline void init_trace_event_call(struct trace_uprobe *tu,
- struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_uprobe *tu)
{
- INIT_LIST_HEAD(&call->class->fields);
+ struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
- call->flags = TRACE_EVENT_FL_UPROBE;
+ call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY;
call->class->reg = trace_uprobe_register;
- call->data = tu;
}
static int register_uprobe_event(struct trace_uprobe *tu)
{
- struct trace_event_call *call = &tu->tp.call;
- int ret = 0;
+ init_trace_event_call(tu);
- init_trace_event_call(tu, call);
-
- if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
- return -ENOMEM;
-
- ret = register_trace_event(&call->event);
- if (!ret) {
- kfree(call->print_fmt);
- return -ENODEV;
- }
-
- ret = trace_add_event_call(call);
-
- if (ret) {
- pr_info("Failed to register uprobe event: %s\n",
- trace_event_name(call));
- kfree(call->print_fmt);
- unregister_trace_event(&call->event);
- }
-
- return ret;
+ return trace_probe_register_event_call(&tu->tp);
}
static int unregister_uprobe_event(struct trace_uprobe *tu)
{
- int ret;
-
- /* tu->event is unregistered in trace_remove_event_call() */
- ret = trace_remove_event_call(&tu->tp.call);
- if (ret)
- return ret;
- kfree(tu->tp.call.print_fmt);
- tu->tp.call.print_fmt = NULL;
- return 0;
+ return trace_probe_unregister_event_call(&tu->tp);
}
#ifdef CONFIG_PERF_EVENTS
struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
+create_local_trace_uprobe(char *name, unsigned long offs,
+ unsigned long ref_ctr_offset, bool is_return)
{
struct trace_uprobe *tu;
struct path path;
@@ -1356,7 +1549,7 @@
}
/*
- * local trace_kprobes are not added to probe_list, so they are never
+ * local trace_kprobes are not added to dyn_event, so they are never
* searched in find_trace_kprobe(). Therefore, there is no concern of
* duplicated name "DUMMY_EVENT" here.
*/
@@ -1372,15 +1565,16 @@
tu->offset = offs;
tu->path = path;
+ tu->ref_ctr_offset = ref_ctr_offset;
tu->filename = kstrdup(name, GFP_KERNEL);
- init_trace_event_call(tu, &tu->tp.call);
+ init_trace_event_call(tu);
- if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
+ if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
ret = -ENOMEM;
goto error;
}
- return &tu->tp.call;
+ return trace_probe_event_call(&tu->tp);
error:
free_trace_uprobe(tu);
return ERR_PTR(ret);
@@ -1390,10 +1584,7 @@
{
struct trace_uprobe *tu;
- tu = container_of(event_call, struct trace_uprobe, tp.call);
-
- kfree(tu->tp.call.print_fmt);
- tu->tp.call.print_fmt = NULL;
+ tu = trace_uprobe_primary_from_call(event_call);
free_trace_uprobe(tu);
}
@@ -1403,6 +1594,11 @@
static __init int init_uprobe_trace(void)
{
struct dentry *d_tracer;
+ int ret;
+
+ ret = dyn_event_register(&trace_uprobe_ops);
+ if (ret)
+ return ret;
d_tracer = tracing_init_dentry();
if (IS_ERR(d_tracer))