blob: 953dd9568dd7421e72fbef07b864b863d0f1d022 [file] [log] [blame]
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001// SPDX-License-Identifier: GPL-2.0
2/*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15#include <linux/ring_buffer.h>
16#include <generated/utsrelease.h>
17#include <linux/stacktrace.h>
18#include <linux/writeback.h>
19#include <linux/kallsyms.h>
David Brazdil0f672f62019-12-10 10:32:29 +000020#include <linux/security.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000021#include <linux/seq_file.h>
22#include <linux/notifier.h>
23#include <linux/irqflags.h>
24#include <linux/debugfs.h>
25#include <linux/tracefs.h>
26#include <linux/pagemap.h>
27#include <linux/hardirq.h>
28#include <linux/linkage.h>
29#include <linux/uaccess.h>
30#include <linux/vmalloc.h>
31#include <linux/ftrace.h>
32#include <linux/module.h>
33#include <linux/percpu.h>
34#include <linux/splice.h>
35#include <linux/kdebug.h>
36#include <linux/string.h>
37#include <linux/mount.h>
38#include <linux/rwsem.h>
39#include <linux/slab.h>
40#include <linux/ctype.h>
41#include <linux/init.h>
42#include <linux/poll.h>
43#include <linux/nmi.h>
44#include <linux/fs.h>
45#include <linux/trace.h>
46#include <linux/sched/clock.h>
47#include <linux/sched/rt.h>
Olivier Deprez157378f2022-04-04 15:47:50 +020048#include <linux/fsnotify.h>
49#include <linux/irq_work.h>
50#include <linux/workqueue.h>
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000051
52#include "trace.h"
53#include "trace_output.h"
54
55/*
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
58 */
59bool ring_buffer_expanded;
60
61/*
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
67 */
68static bool __read_mostly tracing_selftest_running;
69
70/*
Olivier Deprez157378f2022-04-04 15:47:50 +020071 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000073 */
74bool __read_mostly tracing_selftest_disabled;
75
Olivier Deprez157378f2022-04-04 15:47:50 +020076#ifdef CONFIG_FTRACE_STARTUP_TEST
77void __init disable_tracing_selftest(const char *reason)
78{
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83}
84#endif
85
Andrew Scullb4b6d4a2019-01-02 15:54:55 +000086/* Pipe tracepoints to printk */
87struct trace_iterator *tracepoint_print_iter;
88int tracepoint_printk;
89static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91/* For tracers that don't implement custom flags */
92static struct tracer_opt dummy_tracer_opt[] = {
93 { }
94};
95
96static int
97dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98{
99 return 0;
100}
101
102/*
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
105 * occurred.
106 */
107static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109/*
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
113 * this back to zero.
114 */
115static int tracing_disabled = 1;
116
117cpumask_var_t __read_mostly tracing_buffer_mask;
118
119/*
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121 *
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
126 * serial console.
127 *
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
133 */
134
135enum ftrace_dump_mode ftrace_dump_on_oops;
136
137/* When set, tracing will stop when a WARN*() is hit */
138int __disable_trace_on_warning;
139
140#ifdef CONFIG_TRACE_EVAL_MAP_FILE
141/* Map of enums to their values, for "eval_map" file */
142struct trace_eval_map_head {
143 struct module *mod;
144 unsigned long length;
145};
146
147union trace_eval_map_item;
148
149struct trace_eval_map_tail {
150 /*
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
153 */
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
156};
157
158static DEFINE_MUTEX(trace_eval_mutex);
159
160/*
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
166 */
167union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
171};
172
173static union trace_eval_map_item *trace_eval_maps;
174#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
Olivier Deprez157378f2022-04-04 15:47:50 +0200176int tracing_set_tracer(struct trace_array *tr, const char *buf);
Olivier Deprez0e641232021-09-23 10:07:05 +0200177static void ftrace_trace_userstack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +0200178 struct trace_buffer *buffer,
David Brazdil0f672f62019-12-10 10:32:29 +0000179 unsigned long flags, int pc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000180
181#define MAX_TRACER_SIZE 100
182static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183static char *default_bootup_tracer;
184
185static bool allocate_snapshot;
186
187static int __init set_cmdline_ftrace(char *str)
188{
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
193 return 1;
194}
195__setup("ftrace=", set_cmdline_ftrace);
196
197static int __init set_ftrace_dump_on_oops(char *str)
198{
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
201 return 1;
202 }
203
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
206 return 1;
207 }
208
209 return 0;
210}
211__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
212
213static int __init stop_trace_on_warning(char *str)
214{
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
217 return 1;
218}
219__setup("traceoff_on_warning", stop_trace_on_warning);
220
221static int __init boot_alloc_snapshot(char *str)
222{
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
226 return 1;
227}
228__setup("alloc_snapshot", boot_alloc_snapshot);
229
230
231static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
232
233static int __init set_trace_boot_options(char *str)
234{
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
Olivier Deprez157378f2022-04-04 15:47:50 +0200236 return 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000237}
238__setup("trace_options=", set_trace_boot_options);
239
240static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241static char *trace_boot_clock __initdata;
242
243static int __init set_trace_boot_clock(char *str)
244{
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
Olivier Deprez157378f2022-04-04 15:47:50 +0200247 return 1;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000248}
249__setup("trace_clock=", set_trace_boot_clock);
250
251static int __init set_tracepoint_printk(char *str)
252{
Olivier Deprez157378f2022-04-04 15:47:50 +0200253 /* Ignore the "tp_printk_stop_on_boot" param */
254 if (*str == '_')
255 return 0;
256
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000257 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
258 tracepoint_printk = 1;
259 return 1;
260}
261__setup("tp_printk", set_tracepoint_printk);
262
263unsigned long long ns2usecs(u64 nsec)
264{
265 nsec += 500;
266 do_div(nsec, 1000);
267 return nsec;
268}
269
Olivier Deprez157378f2022-04-04 15:47:50 +0200270static void
271trace_process_export(struct trace_export *export,
272 struct ring_buffer_event *event, int flag)
273{
274 struct trace_entry *entry;
275 unsigned int size = 0;
276
277 if (export->flags & flag) {
278 entry = ring_buffer_event_data(event);
279 size = ring_buffer_event_length(event);
280 export->write(export, entry, size);
281 }
282}
283
284static DEFINE_MUTEX(ftrace_export_lock);
285
286static struct trace_export __rcu *ftrace_exports_list __read_mostly;
287
288static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
289static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
290static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
291
292static inline void ftrace_exports_enable(struct trace_export *export)
293{
294 if (export->flags & TRACE_EXPORT_FUNCTION)
295 static_branch_inc(&trace_function_exports_enabled);
296
297 if (export->flags & TRACE_EXPORT_EVENT)
298 static_branch_inc(&trace_event_exports_enabled);
299
300 if (export->flags & TRACE_EXPORT_MARKER)
301 static_branch_inc(&trace_marker_exports_enabled);
302}
303
304static inline void ftrace_exports_disable(struct trace_export *export)
305{
306 if (export->flags & TRACE_EXPORT_FUNCTION)
307 static_branch_dec(&trace_function_exports_enabled);
308
309 if (export->flags & TRACE_EXPORT_EVENT)
310 static_branch_dec(&trace_event_exports_enabled);
311
312 if (export->flags & TRACE_EXPORT_MARKER)
313 static_branch_dec(&trace_marker_exports_enabled);
314}
315
316static void ftrace_exports(struct ring_buffer_event *event, int flag)
317{
318 struct trace_export *export;
319
320 preempt_disable_notrace();
321
322 export = rcu_dereference_raw_check(ftrace_exports_list);
323 while (export) {
324 trace_process_export(export, event, flag);
325 export = rcu_dereference_raw_check(export->next);
326 }
327
328 preempt_enable_notrace();
329}
330
331static inline void
332add_trace_export(struct trace_export **list, struct trace_export *export)
333{
334 rcu_assign_pointer(export->next, *list);
335 /*
336 * We are entering export into the list but another
337 * CPU might be walking that list. We need to make sure
338 * the export->next pointer is valid before another CPU sees
339 * the export pointer included into the list.
340 */
341 rcu_assign_pointer(*list, export);
342}
343
344static inline int
345rm_trace_export(struct trace_export **list, struct trace_export *export)
346{
347 struct trace_export **p;
348
349 for (p = list; *p != NULL; p = &(*p)->next)
350 if (*p == export)
351 break;
352
353 if (*p != export)
354 return -1;
355
356 rcu_assign_pointer(*p, (*p)->next);
357
358 return 0;
359}
360
361static inline void
362add_ftrace_export(struct trace_export **list, struct trace_export *export)
363{
364 ftrace_exports_enable(export);
365
366 add_trace_export(list, export);
367}
368
369static inline int
370rm_ftrace_export(struct trace_export **list, struct trace_export *export)
371{
372 int ret;
373
374 ret = rm_trace_export(list, export);
375 ftrace_exports_disable(export);
376
377 return ret;
378}
379
380int register_ftrace_export(struct trace_export *export)
381{
382 if (WARN_ON_ONCE(!export->write))
383 return -1;
384
385 mutex_lock(&ftrace_export_lock);
386
387 add_ftrace_export(&ftrace_exports_list, export);
388
389 mutex_unlock(&ftrace_export_lock);
390
391 return 0;
392}
393EXPORT_SYMBOL_GPL(register_ftrace_export);
394
395int unregister_ftrace_export(struct trace_export *export)
396{
397 int ret;
398
399 mutex_lock(&ftrace_export_lock);
400
401 ret = rm_ftrace_export(&ftrace_exports_list, export);
402
403 mutex_unlock(&ftrace_export_lock);
404
405 return ret;
406}
407EXPORT_SYMBOL_GPL(unregister_ftrace_export);
408
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000409/* trace_flags holds trace_options default values */
410#define TRACE_DEFAULT_FLAGS \
411 (FUNCTION_DEFAULT_FLAGS | \
412 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
413 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
414 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
415 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
416
417/* trace_options that are only supported by global_trace */
418#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
419 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
420
421/* trace_flags that are default zero for instances */
422#define ZEROED_TRACE_FLAGS \
423 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
424
425/*
426 * The global_trace is the descriptor that holds the top-level tracing
427 * buffers for the live tracing.
428 */
429static struct trace_array global_trace = {
430 .trace_flags = TRACE_DEFAULT_FLAGS,
431};
432
433LIST_HEAD(ftrace_trace_arrays);
434
435int trace_array_get(struct trace_array *this_tr)
436{
437 struct trace_array *tr;
438 int ret = -ENODEV;
439
440 mutex_lock(&trace_types_lock);
441 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
442 if (tr == this_tr) {
443 tr->ref++;
444 ret = 0;
445 break;
446 }
447 }
448 mutex_unlock(&trace_types_lock);
449
450 return ret;
451}
452
453static void __trace_array_put(struct trace_array *this_tr)
454{
455 WARN_ON(!this_tr->ref);
456 this_tr->ref--;
457}
458
Olivier Deprez157378f2022-04-04 15:47:50 +0200459/**
460 * trace_array_put - Decrement the reference counter for this trace array.
461 *
462 * NOTE: Use this when we no longer need the trace array returned by
463 * trace_array_get_by_name(). This ensures the trace array can be later
464 * destroyed.
465 *
466 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000467void trace_array_put(struct trace_array *this_tr)
468{
Olivier Deprez157378f2022-04-04 15:47:50 +0200469 if (!this_tr)
470 return;
471
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000472 mutex_lock(&trace_types_lock);
473 __trace_array_put(this_tr);
474 mutex_unlock(&trace_types_lock);
475}
Olivier Deprez157378f2022-04-04 15:47:50 +0200476EXPORT_SYMBOL_GPL(trace_array_put);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000477
David Brazdil0f672f62019-12-10 10:32:29 +0000478int tracing_check_open_get_tr(struct trace_array *tr)
479{
480 int ret;
481
482 ret = security_locked_down(LOCKDOWN_TRACEFS);
483 if (ret)
484 return ret;
485
486 if (tracing_disabled)
487 return -ENODEV;
488
489 if (tr && trace_array_get(tr) < 0)
490 return -ENODEV;
491
492 return 0;
493}
494
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000495int call_filter_check_discard(struct trace_event_call *call, void *rec,
Olivier Deprez157378f2022-04-04 15:47:50 +0200496 struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000497 struct ring_buffer_event *event)
498{
499 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
500 !filter_match_preds(call->filter, rec)) {
501 __trace_event_discard_commit(buffer, event);
502 return 1;
503 }
504
505 return 0;
506}
507
508void trace_free_pid_list(struct trace_pid_list *pid_list)
509{
510 vfree(pid_list->pids);
511 kfree(pid_list);
512}
513
514/**
515 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
516 * @filtered_pids: The list of pids to check
517 * @search_pid: The PID to find in @filtered_pids
518 *
519 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
520 */
521bool
522trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523{
524 /*
525 * If pid_max changed after filtered_pids was created, we
526 * by default ignore all pids greater than the previous pid_max.
527 */
528 if (search_pid >= filtered_pids->pid_max)
529 return false;
530
531 return test_bit(search_pid, filtered_pids->pids);
532}
533
534/**
535 * trace_ignore_this_task - should a task be ignored for tracing
536 * @filtered_pids: The list of pids to check
537 * @task: The task that should be ignored if not filtered
538 *
539 * Checks if @task should be traced or not from @filtered_pids.
540 * Returns true if @task should *NOT* be traced.
541 * Returns false if @task should be traced.
542 */
543bool
Olivier Deprez157378f2022-04-04 15:47:50 +0200544trace_ignore_this_task(struct trace_pid_list *filtered_pids,
545 struct trace_pid_list *filtered_no_pids,
546 struct task_struct *task)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000547{
548 /*
Olivier Deprez157378f2022-04-04 15:47:50 +0200549 * If filterd_no_pids is not empty, and the task's pid is listed
550 * in filtered_no_pids, then return true.
551 * Otherwise, if filtered_pids is empty, that means we can
552 * trace all tasks. If it has content, then only trace pids
553 * within filtered_pids.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000554 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000555
Olivier Deprez157378f2022-04-04 15:47:50 +0200556 return (filtered_pids &&
557 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 (filtered_no_pids &&
559 trace_find_filtered_pid(filtered_no_pids, task->pid));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000560}
561
562/**
David Brazdil0f672f62019-12-10 10:32:29 +0000563 * trace_filter_add_remove_task - Add or remove a task from a pid_list
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000564 * @pid_list: The list to modify
565 * @self: The current task for fork or NULL for exit
566 * @task: The task to add or remove
567 *
568 * If adding a task, if @self is defined, the task is only added if @self
569 * is also included in @pid_list. This happens on fork and tasks should
570 * only be added when the parent is listed. If @self is NULL, then the
571 * @task pid will be removed from the list, which would happen on exit
572 * of a task.
573 */
574void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
575 struct task_struct *self,
576 struct task_struct *task)
577{
578 if (!pid_list)
579 return;
580
581 /* For forks, we only add if the forking task is listed */
582 if (self) {
583 if (!trace_find_filtered_pid(pid_list, self->pid))
584 return;
585 }
586
587 /* Sorry, but we don't support pid_max changing after setting */
588 if (task->pid >= pid_list->pid_max)
589 return;
590
591 /* "self" is set for forks, and NULL for exits */
592 if (self)
593 set_bit(task->pid, pid_list->pids);
594 else
595 clear_bit(task->pid, pid_list->pids);
596}
597
598/**
599 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
600 * @pid_list: The pid list to show
601 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
602 * @pos: The position of the file
603 *
604 * This is used by the seq_file "next" operation to iterate the pids
605 * listed in a trace_pid_list structure.
606 *
607 * Returns the pid+1 as we want to display pid of zero, but NULL would
608 * stop the iteration.
609 */
610void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611{
612 unsigned long pid = (unsigned long)v;
613
614 (*pos)++;
615
616 /* pid already is +1 of the actual prevous bit */
617 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618
619 /* Return pid + 1 to allow zero to be represented */
620 if (pid < pid_list->pid_max)
621 return (void *)(pid + 1);
622
623 return NULL;
624}
625
626/**
627 * trace_pid_start - Used for seq_file to start reading pid lists
628 * @pid_list: The pid list to show
629 * @pos: The position of the file
630 *
631 * This is used by seq_file "start" operation to start the iteration
632 * of listing pids.
633 *
634 * Returns the pid+1 as we want to display pid of zero, but NULL would
635 * stop the iteration.
636 */
637void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
638{
639 unsigned long pid;
640 loff_t l = 0;
641
642 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
643 if (pid >= pid_list->pid_max)
644 return NULL;
645
646 /* Return pid + 1 so that zero can be the exit value */
647 for (pid++; pid && l < *pos;
648 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
649 ;
650 return (void *)pid;
651}
652
653/**
654 * trace_pid_show - show the current pid in seq_file processing
655 * @m: The seq_file structure to write into
656 * @v: A void pointer of the pid (+1) value to display
657 *
658 * Can be directly used by seq_file operations to display the current
659 * pid value.
660 */
661int trace_pid_show(struct seq_file *m, void *v)
662{
663 unsigned long pid = (unsigned long)v - 1;
664
665 seq_printf(m, "%lu\n", pid);
666 return 0;
667}
668
669/* 128 should be much more than enough */
670#define PID_BUF_SIZE 127
671
672int trace_pid_write(struct trace_pid_list *filtered_pids,
673 struct trace_pid_list **new_pid_list,
674 const char __user *ubuf, size_t cnt)
675{
676 struct trace_pid_list *pid_list;
677 struct trace_parser parser;
678 unsigned long val;
679 int nr_pids = 0;
680 ssize_t read = 0;
681 ssize_t ret = 0;
682 loff_t pos;
683 pid_t pid;
684
685 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
686 return -ENOMEM;
687
688 /*
689 * Always recreate a new array. The write is an all or nothing
690 * operation. Always create a new array when adding new pids by
691 * the user. If the operation fails, then the current list is
692 * not modified.
693 */
694 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
David Brazdil0f672f62019-12-10 10:32:29 +0000695 if (!pid_list) {
696 trace_parser_put(&parser);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000697 return -ENOMEM;
David Brazdil0f672f62019-12-10 10:32:29 +0000698 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000699
700 pid_list->pid_max = READ_ONCE(pid_max);
701
702 /* Only truncating will shrink pid_max */
703 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
704 pid_list->pid_max = filtered_pids->pid_max;
705
706 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
707 if (!pid_list->pids) {
David Brazdil0f672f62019-12-10 10:32:29 +0000708 trace_parser_put(&parser);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000709 kfree(pid_list);
710 return -ENOMEM;
711 }
712
713 if (filtered_pids) {
714 /* copy the current bits to the new max */
715 for_each_set_bit(pid, filtered_pids->pids,
716 filtered_pids->pid_max) {
717 set_bit(pid, pid_list->pids);
718 nr_pids++;
719 }
720 }
721
722 while (cnt > 0) {
723
724 pos = 0;
725
726 ret = trace_get_user(&parser, ubuf, cnt, &pos);
727 if (ret < 0 || !trace_parser_loaded(&parser))
728 break;
729
730 read += ret;
731 ubuf += ret;
732 cnt -= ret;
733
734 ret = -EINVAL;
735 if (kstrtoul(parser.buffer, 0, &val))
736 break;
737 if (val >= pid_list->pid_max)
738 break;
739
740 pid = (pid_t)val;
741
742 set_bit(pid, pid_list->pids);
743 nr_pids++;
744
745 trace_parser_clear(&parser);
746 ret = 0;
747 }
748 trace_parser_put(&parser);
749
750 if (ret < 0) {
751 trace_free_pid_list(pid_list);
752 return ret;
753 }
754
755 if (!nr_pids) {
756 /* Cleared the list of pids */
757 trace_free_pid_list(pid_list);
758 read = ret;
759 pid_list = NULL;
760 }
761
762 *new_pid_list = pid_list;
763
764 return read;
765}
766
Olivier Deprez157378f2022-04-04 15:47:50 +0200767static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000768{
769 u64 ts;
770
771 /* Early boot up does not have a buffer yet */
772 if (!buf->buffer)
773 return trace_clock_local();
774
775 ts = ring_buffer_time_stamp(buf->buffer, cpu);
776 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
777
778 return ts;
779}
780
781u64 ftrace_now(int cpu)
782{
Olivier Deprez157378f2022-04-04 15:47:50 +0200783 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000784}
785
786/**
787 * tracing_is_enabled - Show if global_trace has been disabled
788 *
789 * Shows if the global trace has been enabled or not. It uses the
790 * mirror flag "buffer_disabled" to be used in fast paths such as for
791 * the irqsoff tracer. But it may be inaccurate due to races. If you
792 * need to know the accurate state, use tracing_is_on() which is a little
793 * slower, but accurate.
794 */
795int tracing_is_enabled(void)
796{
797 /*
798 * For quick access (irqsoff uses this in fast path), just
799 * return the mirror variable of the state of the ring buffer.
800 * It's a little racy, but we don't really care.
801 */
802 smp_rmb();
803 return !global_trace.buffer_disabled;
804}
805
806/*
807 * trace_buf_size is the size in bytes that is allocated
808 * for a buffer. Note, the number of bytes is always rounded
809 * to page size.
810 *
811 * This number is purposely set to a low number of 16384.
812 * If the dump on oops happens, it will be much appreciated
813 * to not have to wait for all that output. Anyway this can be
814 * boot time and run time configurable.
815 */
816#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817
818static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819
820/* trace_types holds a link list of available tracers. */
821static struct tracer *trace_types __read_mostly;
822
823/*
824 * trace_types_lock is used to protect the trace_types list.
825 */
826DEFINE_MUTEX(trace_types_lock);
827
828/*
829 * serialize the access of the ring buffer
830 *
831 * ring buffer serializes readers, but it is low level protection.
832 * The validity of the events (which returns by ring_buffer_peek() ..etc)
833 * are not protected by ring buffer.
834 *
835 * The content of events may become garbage if we allow other process consumes
836 * these events concurrently:
837 * A) the page of the consumed events may become a normal page
838 * (not reader page) in ring buffer, and this page will be rewrited
839 * by events producer.
840 * B) The page of the consumed events may become a page for splice_read,
841 * and this page will be returned to system.
842 *
843 * These primitives allow multi process access to different cpu ring buffer
844 * concurrently.
845 *
846 * These primitives don't distinguish read-only and read-consume access.
847 * Multi read-only access are also serialized.
848 */
849
850#ifdef CONFIG_SMP
851static DECLARE_RWSEM(all_cpu_access_lock);
852static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853
854static inline void trace_access_lock(int cpu)
855{
856 if (cpu == RING_BUFFER_ALL_CPUS) {
857 /* gain it for accessing the whole ring buffer. */
858 down_write(&all_cpu_access_lock);
859 } else {
860 /* gain it for accessing a cpu ring buffer. */
861
862 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
863 down_read(&all_cpu_access_lock);
864
865 /* Secondly block other access to this @cpu ring buffer. */
866 mutex_lock(&per_cpu(cpu_access_lock, cpu));
867 }
868}
869
870static inline void trace_access_unlock(int cpu)
871{
872 if (cpu == RING_BUFFER_ALL_CPUS) {
873 up_write(&all_cpu_access_lock);
874 } else {
875 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
876 up_read(&all_cpu_access_lock);
877 }
878}
879
880static inline void trace_access_lock_init(void)
881{
882 int cpu;
883
884 for_each_possible_cpu(cpu)
885 mutex_init(&per_cpu(cpu_access_lock, cpu));
886}
887
888#else
889
890static DEFINE_MUTEX(access_lock);
891
892static inline void trace_access_lock(int cpu)
893{
894 (void)cpu;
895 mutex_lock(&access_lock);
896}
897
898static inline void trace_access_unlock(int cpu)
899{
900 (void)cpu;
901 mutex_unlock(&access_lock);
902}
903
904static inline void trace_access_lock_init(void)
905{
906}
907
908#endif
909
910#ifdef CONFIG_STACKTRACE
Olivier Deprez157378f2022-04-04 15:47:50 +0200911static void __ftrace_trace_stack(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000912 unsigned long flags,
913 int skip, int pc, struct pt_regs *regs);
914static inline void ftrace_trace_stack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +0200915 struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000916 unsigned long flags,
917 int skip, int pc, struct pt_regs *regs);
918
919#else
Olivier Deprez157378f2022-04-04 15:47:50 +0200920static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000921 unsigned long flags,
922 int skip, int pc, struct pt_regs *regs)
923{
924}
925static inline void ftrace_trace_stack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +0200926 struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000927 unsigned long flags,
928 int skip, int pc, struct pt_regs *regs)
929{
930}
931
932#endif
933
934static __always_inline void
935trace_event_setup(struct ring_buffer_event *event,
936 int type, unsigned long flags, int pc)
937{
938 struct trace_entry *ent = ring_buffer_event_data(event);
939
David Brazdil0f672f62019-12-10 10:32:29 +0000940 tracing_generic_entry_update(ent, type, flags, pc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000941}
942
943static __always_inline struct ring_buffer_event *
Olivier Deprez157378f2022-04-04 15:47:50 +0200944__trace_buffer_lock_reserve(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000945 int type,
946 unsigned long len,
947 unsigned long flags, int pc)
948{
949 struct ring_buffer_event *event;
950
951 event = ring_buffer_lock_reserve(buffer, len);
952 if (event != NULL)
953 trace_event_setup(event, type, flags, pc);
954
955 return event;
956}
957
958void tracer_tracing_on(struct trace_array *tr)
959{
Olivier Deprez157378f2022-04-04 15:47:50 +0200960 if (tr->array_buffer.buffer)
961 ring_buffer_record_on(tr->array_buffer.buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000962 /*
963 * This flag is looked at when buffers haven't been allocated
964 * yet, or by some tracers (like irqsoff), that just want to
965 * know if the ring buffer has been disabled, but it can handle
966 * races of where it gets disabled but we still do a record.
967 * As the check is in the fast path of the tracers, it is more
968 * important to be fast than accurate.
969 */
970 tr->buffer_disabled = 0;
971 /* Make the flag seen by readers */
972 smp_wmb();
973}
974
975/**
976 * tracing_on - enable tracing buffers
977 *
978 * This function enables tracing buffers that may have been
979 * disabled with tracing_off.
980 */
981void tracing_on(void)
982{
983 tracer_tracing_on(&global_trace);
984}
985EXPORT_SYMBOL_GPL(tracing_on);
986
987
988static __always_inline void
Olivier Deprez157378f2022-04-04 15:47:50 +0200989__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +0000990{
991 __this_cpu_write(trace_taskinfo_save, true);
992
993 /* If this is the temp buffer, we need to commit fully */
994 if (this_cpu_read(trace_buffered_event) == event) {
995 /* Length is in event->array[0] */
996 ring_buffer_write(buffer, event->array[0], &event->array[1]);
997 /* Release the temp buffer */
998 this_cpu_dec(trace_buffered_event_cnt);
999 } else
1000 ring_buffer_unlock_commit(buffer, event);
1001}
1002
1003/**
1004 * __trace_puts - write a constant string into the trace buffer.
1005 * @ip: The address of the caller
1006 * @str: The constant string to write
1007 * @size: The size of the string.
1008 */
1009int __trace_puts(unsigned long ip, const char *str, int size)
1010{
1011 struct ring_buffer_event *event;
Olivier Deprez157378f2022-04-04 15:47:50 +02001012 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001013 struct print_entry *entry;
1014 unsigned long irq_flags;
1015 int alloc;
1016 int pc;
1017
1018 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 return 0;
1020
1021 pc = preempt_count();
1022
1023 if (unlikely(tracing_selftest_running || tracing_disabled))
1024 return 0;
1025
1026 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1027
1028 local_save_flags(irq_flags);
Olivier Deprez157378f2022-04-04 15:47:50 +02001029 buffer = global_trace.array_buffer.buffer;
1030 ring_buffer_nest_start(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001031 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1032 irq_flags, pc);
Olivier Deprez157378f2022-04-04 15:47:50 +02001033 if (!event) {
1034 size = 0;
1035 goto out;
1036 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001037
1038 entry = ring_buffer_event_data(event);
1039 entry->ip = ip;
1040
1041 memcpy(&entry->buf, str, size);
1042
1043 /* Add a newline if necessary */
1044 if (entry->buf[size - 1] != '\n') {
1045 entry->buf[size] = '\n';
1046 entry->buf[size + 1] = '\0';
1047 } else
1048 entry->buf[size] = '\0';
1049
1050 __buffer_unlock_commit(buffer, event);
1051 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
Olivier Deprez157378f2022-04-04 15:47:50 +02001052 out:
1053 ring_buffer_nest_end(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001054 return size;
1055}
1056EXPORT_SYMBOL_GPL(__trace_puts);
1057
1058/**
1059 * __trace_bputs - write the pointer to a constant string into trace buffer
1060 * @ip: The address of the caller
1061 * @str: The constant string to write to the buffer to
1062 */
1063int __trace_bputs(unsigned long ip, const char *str)
1064{
1065 struct ring_buffer_event *event;
Olivier Deprez157378f2022-04-04 15:47:50 +02001066 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001067 struct bputs_entry *entry;
1068 unsigned long irq_flags;
1069 int size = sizeof(struct bputs_entry);
Olivier Deprez157378f2022-04-04 15:47:50 +02001070 int ret = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001071 int pc;
1072
1073 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1074 return 0;
1075
1076 pc = preempt_count();
1077
1078 if (unlikely(tracing_selftest_running || tracing_disabled))
1079 return 0;
1080
1081 local_save_flags(irq_flags);
Olivier Deprez157378f2022-04-04 15:47:50 +02001082 buffer = global_trace.array_buffer.buffer;
1083
1084 ring_buffer_nest_start(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001085 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1086 irq_flags, pc);
1087 if (!event)
Olivier Deprez157378f2022-04-04 15:47:50 +02001088 goto out;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001089
1090 entry = ring_buffer_event_data(event);
1091 entry->ip = ip;
1092 entry->str = str;
1093
1094 __buffer_unlock_commit(buffer, event);
1095 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
1096
Olivier Deprez157378f2022-04-04 15:47:50 +02001097 ret = 1;
1098 out:
1099 ring_buffer_nest_end(buffer);
1100 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001101}
1102EXPORT_SYMBOL_GPL(__trace_bputs);
1103
1104#ifdef CONFIG_TRACER_SNAPSHOT
Olivier Deprez157378f2022-04-04 15:47:50 +02001105static void tracing_snapshot_instance_cond(struct trace_array *tr,
1106 void *cond_data)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001107{
1108 struct tracer *tracer = tr->current_trace;
1109 unsigned long flags;
1110
1111 if (in_nmi()) {
1112 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1113 internal_trace_puts("*** snapshot is being ignored ***\n");
1114 return;
1115 }
1116
1117 if (!tr->allocated_snapshot) {
1118 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1119 internal_trace_puts("*** stopping trace here! ***\n");
1120 tracing_off();
1121 return;
1122 }
1123
1124 /* Note, snapshot can not be used when the tracer uses it */
1125 if (tracer->use_max_tr) {
1126 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1127 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1128 return;
1129 }
1130
1131 local_irq_save(flags);
David Brazdil0f672f62019-12-10 10:32:29 +00001132 update_max_tr(tr, current, smp_processor_id(), cond_data);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001133 local_irq_restore(flags);
1134}
1135
David Brazdil0f672f62019-12-10 10:32:29 +00001136void tracing_snapshot_instance(struct trace_array *tr)
1137{
1138 tracing_snapshot_instance_cond(tr, NULL);
1139}
1140
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001141/**
1142 * tracing_snapshot - take a snapshot of the current buffer.
1143 *
1144 * This causes a swap between the snapshot buffer and the current live
1145 * tracing buffer. You can use this to take snapshots of the live
1146 * trace when some condition is triggered, but continue to trace.
1147 *
1148 * Note, make sure to allocate the snapshot with either
1149 * a tracing_snapshot_alloc(), or by doing it manually
1150 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1151 *
1152 * If the snapshot buffer is not allocated, it will stop tracing.
1153 * Basically making a permanent snapshot.
1154 */
1155void tracing_snapshot(void)
1156{
1157 struct trace_array *tr = &global_trace;
1158
1159 tracing_snapshot_instance(tr);
1160}
1161EXPORT_SYMBOL_GPL(tracing_snapshot);
1162
David Brazdil0f672f62019-12-10 10:32:29 +00001163/**
1164 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1165 * @tr: The tracing instance to snapshot
1166 * @cond_data: The data to be tested conditionally, and possibly saved
1167 *
1168 * This is the same as tracing_snapshot() except that the snapshot is
1169 * conditional - the snapshot will only happen if the
1170 * cond_snapshot.update() implementation receiving the cond_data
1171 * returns true, which means that the trace array's cond_snapshot
1172 * update() operation used the cond_data to determine whether the
1173 * snapshot should be taken, and if it was, presumably saved it along
1174 * with the snapshot.
1175 */
1176void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1177{
1178 tracing_snapshot_instance_cond(tr, cond_data);
1179}
1180EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1181
1182/**
1183 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1184 * @tr: The tracing instance
1185 *
1186 * When the user enables a conditional snapshot using
1187 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1188 * with the snapshot. This accessor is used to retrieve it.
1189 *
1190 * Should not be called from cond_snapshot.update(), since it takes
1191 * the tr->max_lock lock, which the code calling
1192 * cond_snapshot.update() has already done.
1193 *
1194 * Returns the cond_data associated with the trace array's snapshot.
1195 */
1196void *tracing_cond_snapshot_data(struct trace_array *tr)
1197{
1198 void *cond_data = NULL;
1199
1200 arch_spin_lock(&tr->max_lock);
1201
1202 if (tr->cond_snapshot)
1203 cond_data = tr->cond_snapshot->cond_data;
1204
1205 arch_spin_unlock(&tr->max_lock);
1206
1207 return cond_data;
1208}
1209EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1210
Olivier Deprez157378f2022-04-04 15:47:50 +02001211static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1212 struct array_buffer *size_buf, int cpu_id);
1213static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001214
1215int tracing_alloc_snapshot_instance(struct trace_array *tr)
1216{
1217 int ret;
1218
1219 if (!tr->allocated_snapshot) {
1220
1221 /* allocate spare buffer */
1222 ret = resize_buffer_duplicate_size(&tr->max_buffer,
Olivier Deprez157378f2022-04-04 15:47:50 +02001223 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001224 if (ret < 0)
1225 return ret;
1226
1227 tr->allocated_snapshot = true;
1228 }
1229
1230 return 0;
1231}
1232
1233static void free_snapshot(struct trace_array *tr)
1234{
1235 /*
1236 * We don't free the ring buffer. instead, resize it because
1237 * The max_tr ring buffer has some state (e.g. ring->clock) and
1238 * we want preserve it.
1239 */
1240 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1241 set_buffer_entries(&tr->max_buffer, 1);
1242 tracing_reset_online_cpus(&tr->max_buffer);
1243 tr->allocated_snapshot = false;
1244}
1245
1246/**
1247 * tracing_alloc_snapshot - allocate snapshot buffer.
1248 *
1249 * This only allocates the snapshot buffer if it isn't already
1250 * allocated - it doesn't also take a snapshot.
1251 *
1252 * This is meant to be used in cases where the snapshot buffer needs
1253 * to be set up for events that can't sleep but need to be able to
1254 * trigger a snapshot.
1255 */
1256int tracing_alloc_snapshot(void)
1257{
1258 struct trace_array *tr = &global_trace;
1259 int ret;
1260
1261 ret = tracing_alloc_snapshot_instance(tr);
1262 WARN_ON(ret < 0);
1263
1264 return ret;
1265}
1266EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1267
1268/**
1269 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1270 *
1271 * This is similar to tracing_snapshot(), but it will allocate the
1272 * snapshot buffer if it isn't already allocated. Use this only
1273 * where it is safe to sleep, as the allocation may sleep.
1274 *
1275 * This causes a swap between the snapshot buffer and the current live
1276 * tracing buffer. You can use this to take snapshots of the live
1277 * trace when some condition is triggered, but continue to trace.
1278 */
1279void tracing_snapshot_alloc(void)
1280{
1281 int ret;
1282
1283 ret = tracing_alloc_snapshot();
1284 if (ret < 0)
1285 return;
1286
1287 tracing_snapshot();
1288}
1289EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
David Brazdil0f672f62019-12-10 10:32:29 +00001290
1291/**
1292 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1293 * @tr: The tracing instance
1294 * @cond_data: User data to associate with the snapshot
1295 * @update: Implementation of the cond_snapshot update function
1296 *
1297 * Check whether the conditional snapshot for the given instance has
1298 * already been enabled, or if the current tracer is already using a
1299 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1300 * save the cond_data and update function inside.
1301 *
1302 * Returns 0 if successful, error otherwise.
1303 */
1304int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1305 cond_update_fn_t update)
1306{
1307 struct cond_snapshot *cond_snapshot;
1308 int ret = 0;
1309
1310 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1311 if (!cond_snapshot)
1312 return -ENOMEM;
1313
1314 cond_snapshot->cond_data = cond_data;
1315 cond_snapshot->update = update;
1316
1317 mutex_lock(&trace_types_lock);
1318
1319 ret = tracing_alloc_snapshot_instance(tr);
1320 if (ret)
1321 goto fail_unlock;
1322
1323 if (tr->current_trace->use_max_tr) {
1324 ret = -EBUSY;
1325 goto fail_unlock;
1326 }
1327
1328 /*
1329 * The cond_snapshot can only change to NULL without the
1330 * trace_types_lock. We don't care if we race with it going
1331 * to NULL, but we want to make sure that it's not set to
1332 * something other than NULL when we get here, which we can
1333 * do safely with only holding the trace_types_lock and not
1334 * having to take the max_lock.
1335 */
1336 if (tr->cond_snapshot) {
1337 ret = -EBUSY;
1338 goto fail_unlock;
1339 }
1340
1341 arch_spin_lock(&tr->max_lock);
1342 tr->cond_snapshot = cond_snapshot;
1343 arch_spin_unlock(&tr->max_lock);
1344
1345 mutex_unlock(&trace_types_lock);
1346
1347 return ret;
1348
1349 fail_unlock:
1350 mutex_unlock(&trace_types_lock);
1351 kfree(cond_snapshot);
1352 return ret;
1353}
1354EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1355
1356/**
1357 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1358 * @tr: The tracing instance
1359 *
1360 * Check whether the conditional snapshot for the given instance is
1361 * enabled; if so, free the cond_snapshot associated with it,
1362 * otherwise return -EINVAL.
1363 *
1364 * Returns 0 if successful, error otherwise.
1365 */
1366int tracing_snapshot_cond_disable(struct trace_array *tr)
1367{
1368 int ret = 0;
1369
1370 arch_spin_lock(&tr->max_lock);
1371
1372 if (!tr->cond_snapshot)
1373 ret = -EINVAL;
1374 else {
1375 kfree(tr->cond_snapshot);
1376 tr->cond_snapshot = NULL;
1377 }
1378
1379 arch_spin_unlock(&tr->max_lock);
1380
1381 return ret;
1382}
1383EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001384#else
1385void tracing_snapshot(void)
1386{
1387 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1388}
1389EXPORT_SYMBOL_GPL(tracing_snapshot);
David Brazdil0f672f62019-12-10 10:32:29 +00001390void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1391{
1392 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1393}
1394EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001395int tracing_alloc_snapshot(void)
1396{
1397 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1398 return -ENODEV;
1399}
1400EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1401void tracing_snapshot_alloc(void)
1402{
1403 /* Give warning */
1404 tracing_snapshot();
1405}
1406EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
David Brazdil0f672f62019-12-10 10:32:29 +00001407void *tracing_cond_snapshot_data(struct trace_array *tr)
1408{
1409 return NULL;
1410}
1411EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1412int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1413{
1414 return -ENODEV;
1415}
1416EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1417int tracing_snapshot_cond_disable(struct trace_array *tr)
1418{
1419 return false;
1420}
1421EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001422#endif /* CONFIG_TRACER_SNAPSHOT */
1423
1424void tracer_tracing_off(struct trace_array *tr)
1425{
Olivier Deprez157378f2022-04-04 15:47:50 +02001426 if (tr->array_buffer.buffer)
1427 ring_buffer_record_off(tr->array_buffer.buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001428 /*
1429 * This flag is looked at when buffers haven't been allocated
1430 * yet, or by some tracers (like irqsoff), that just want to
1431 * know if the ring buffer has been disabled, but it can handle
1432 * races of where it gets disabled but we still do a record.
1433 * As the check is in the fast path of the tracers, it is more
1434 * important to be fast than accurate.
1435 */
1436 tr->buffer_disabled = 1;
1437 /* Make the flag seen by readers */
1438 smp_wmb();
1439}
1440
1441/**
1442 * tracing_off - turn off tracing buffers
1443 *
1444 * This function stops the tracing buffers from recording data.
1445 * It does not disable any overhead the tracers themselves may
1446 * be causing. This function simply causes all recording to
1447 * the ring buffers to fail.
1448 */
1449void tracing_off(void)
1450{
1451 tracer_tracing_off(&global_trace);
1452}
1453EXPORT_SYMBOL_GPL(tracing_off);
1454
1455void disable_trace_on_warning(void)
1456{
Olivier Deprez157378f2022-04-04 15:47:50 +02001457 if (__disable_trace_on_warning) {
1458 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1459 "Disabling tracing due to warning\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001460 tracing_off();
Olivier Deprez157378f2022-04-04 15:47:50 +02001461 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001462}
1463
1464/**
1465 * tracer_tracing_is_on - show real state of ring buffer enabled
1466 * @tr : the trace array to know if ring buffer is enabled
1467 *
1468 * Shows real state of the ring buffer if it is enabled or not.
1469 */
1470bool tracer_tracing_is_on(struct trace_array *tr)
1471{
Olivier Deprez157378f2022-04-04 15:47:50 +02001472 if (tr->array_buffer.buffer)
1473 return ring_buffer_record_is_on(tr->array_buffer.buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001474 return !tr->buffer_disabled;
1475}
1476
1477/**
1478 * tracing_is_on - show state of ring buffers enabled
1479 */
1480int tracing_is_on(void)
1481{
1482 return tracer_tracing_is_on(&global_trace);
1483}
1484EXPORT_SYMBOL_GPL(tracing_is_on);
1485
1486static int __init set_buf_size(char *str)
1487{
1488 unsigned long buf_size;
1489
1490 if (!str)
1491 return 0;
1492 buf_size = memparse(str, &str);
Olivier Deprez157378f2022-04-04 15:47:50 +02001493 /*
1494 * nr_entries can not be zero and the startup
1495 * tests require some buffer space. Therefore
1496 * ensure we have at least 4096 bytes of buffer.
1497 */
1498 trace_buf_size = max(4096UL, buf_size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001499 return 1;
1500}
1501__setup("trace_buf_size=", set_buf_size);
1502
1503static int __init set_tracing_thresh(char *str)
1504{
1505 unsigned long threshold;
1506 int ret;
1507
1508 if (!str)
1509 return 0;
1510 ret = kstrtoul(str, 0, &threshold);
1511 if (ret < 0)
1512 return 0;
1513 tracing_thresh = threshold * 1000;
1514 return 1;
1515}
1516__setup("tracing_thresh=", set_tracing_thresh);
1517
1518unsigned long nsecs_to_usecs(unsigned long nsecs)
1519{
1520 return nsecs / 1000;
1521}
1522
1523/*
1524 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1525 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1526 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1527 * of strings in the order that the evals (enum) were defined.
1528 */
1529#undef C
1530#define C(a, b) b
1531
1532/* These must match the bit postions in trace_iterator_flags */
1533static const char *trace_options[] = {
1534 TRACE_FLAGS
1535 NULL
1536};
1537
1538static struct {
1539 u64 (*func)(void);
1540 const char *name;
1541 int in_ns; /* is this clock in nanoseconds? */
1542} trace_clocks[] = {
1543 { trace_clock_local, "local", 1 },
1544 { trace_clock_global, "global", 1 },
1545 { trace_clock_counter, "counter", 0 },
1546 { trace_clock_jiffies, "uptime", 0 },
1547 { trace_clock, "perf", 1 },
1548 { ktime_get_mono_fast_ns, "mono", 1 },
1549 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1550 { ktime_get_boot_fast_ns, "boot", 1 },
1551 ARCH_TRACE_CLOCKS
1552};
1553
1554bool trace_clock_in_ns(struct trace_array *tr)
1555{
1556 if (trace_clocks[tr->clock_id].in_ns)
1557 return true;
1558
1559 return false;
1560}
1561
1562/*
1563 * trace_parser_get_init - gets the buffer for trace parser
1564 */
1565int trace_parser_get_init(struct trace_parser *parser, int size)
1566{
1567 memset(parser, 0, sizeof(*parser));
1568
1569 parser->buffer = kmalloc(size, GFP_KERNEL);
1570 if (!parser->buffer)
1571 return 1;
1572
1573 parser->size = size;
1574 return 0;
1575}
1576
1577/*
1578 * trace_parser_put - frees the buffer for trace parser
1579 */
1580void trace_parser_put(struct trace_parser *parser)
1581{
1582 kfree(parser->buffer);
1583 parser->buffer = NULL;
1584}
1585
1586/*
1587 * trace_get_user - reads the user input string separated by space
1588 * (matched by isspace(ch))
1589 *
1590 * For each string found the 'struct trace_parser' is updated,
1591 * and the function returns.
1592 *
1593 * Returns number of bytes read.
1594 *
1595 * See kernel/trace/trace.h for 'struct trace_parser' details.
1596 */
1597int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1598 size_t cnt, loff_t *ppos)
1599{
1600 char ch;
1601 size_t read = 0;
1602 ssize_t ret;
1603
1604 if (!*ppos)
1605 trace_parser_clear(parser);
1606
1607 ret = get_user(ch, ubuf++);
1608 if (ret)
1609 goto out;
1610
1611 read++;
1612 cnt--;
1613
1614 /*
1615 * The parser is not finished with the last write,
1616 * continue reading the user input without skipping spaces.
1617 */
1618 if (!parser->cont) {
1619 /* skip white space */
1620 while (cnt && isspace(ch)) {
1621 ret = get_user(ch, ubuf++);
1622 if (ret)
1623 goto out;
1624 read++;
1625 cnt--;
1626 }
1627
1628 parser->idx = 0;
1629
1630 /* only spaces were written */
1631 if (isspace(ch) || !ch) {
1632 *ppos += read;
1633 ret = read;
1634 goto out;
1635 }
1636 }
1637
1638 /* read the non-space input */
1639 while (cnt && !isspace(ch) && ch) {
1640 if (parser->idx < parser->size - 1)
1641 parser->buffer[parser->idx++] = ch;
1642 else {
1643 ret = -EINVAL;
1644 goto out;
1645 }
1646 ret = get_user(ch, ubuf++);
1647 if (ret)
1648 goto out;
1649 read++;
1650 cnt--;
1651 }
1652
1653 /* We either got finished input or we have to wait for another call. */
1654 if (isspace(ch) || !ch) {
1655 parser->buffer[parser->idx] = 0;
1656 parser->cont = false;
1657 } else if (parser->idx < parser->size - 1) {
1658 parser->cont = true;
1659 parser->buffer[parser->idx++] = ch;
1660 /* Make sure the parsed string always terminates with '\0'. */
1661 parser->buffer[parser->idx] = 0;
1662 } else {
1663 ret = -EINVAL;
1664 goto out;
1665 }
1666
1667 *ppos += read;
1668 ret = read;
1669
1670out:
1671 return ret;
1672}
1673
1674/* TODO add a seq_buf_to_buffer() */
1675static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1676{
1677 int len;
1678
1679 if (trace_seq_used(s) <= s->seq.readpos)
1680 return -EBUSY;
1681
1682 len = trace_seq_used(s) - s->seq.readpos;
1683 if (cnt > len)
1684 cnt = len;
1685 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1686
1687 s->seq.readpos += cnt;
1688 return cnt;
1689}
1690
1691unsigned long __read_mostly tracing_thresh;
Olivier Deprez157378f2022-04-04 15:47:50 +02001692static const struct file_operations tracing_max_lat_fops;
1693
1694#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1695 defined(CONFIG_FSNOTIFY)
1696
1697static struct workqueue_struct *fsnotify_wq;
1698
1699static void latency_fsnotify_workfn(struct work_struct *work)
1700{
1701 struct trace_array *tr = container_of(work, struct trace_array,
1702 fsnotify_work);
1703 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1704}
1705
1706static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1707{
1708 struct trace_array *tr = container_of(iwork, struct trace_array,
1709 fsnotify_irqwork);
1710 queue_work(fsnotify_wq, &tr->fsnotify_work);
1711}
1712
1713static void trace_create_maxlat_file(struct trace_array *tr,
1714 struct dentry *d_tracer)
1715{
1716 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1717 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1718 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1719 d_tracer, &tr->max_latency,
1720 &tracing_max_lat_fops);
1721}
1722
1723__init static int latency_fsnotify_init(void)
1724{
1725 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1726 WQ_UNBOUND | WQ_HIGHPRI, 0);
1727 if (!fsnotify_wq) {
1728 pr_err("Unable to allocate tr_max_lat_wq\n");
1729 return -ENOMEM;
1730 }
1731 return 0;
1732}
1733
1734late_initcall_sync(latency_fsnotify_init);
1735
1736void latency_fsnotify(struct trace_array *tr)
1737{
1738 if (!fsnotify_wq)
1739 return;
1740 /*
1741 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1742 * possible that we are called from __schedule() or do_idle(), which
1743 * could cause a deadlock.
1744 */
1745 irq_work_queue(&tr->fsnotify_irqwork);
1746}
1747
1748/*
1749 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1750 * defined(CONFIG_FSNOTIFY)
1751 */
1752#else
1753
1754#define trace_create_maxlat_file(tr, d_tracer) \
1755 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1756 &tr->max_latency, &tracing_max_lat_fops)
1757
1758#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001759
1760#ifdef CONFIG_TRACER_MAX_TRACE
1761/*
1762 * Copy the new maximum trace into the separate maximum-trace
1763 * structure. (this way the maximum trace is permanently saved,
1764 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1765 */
1766static void
1767__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1768{
Olivier Deprez157378f2022-04-04 15:47:50 +02001769 struct array_buffer *trace_buf = &tr->array_buffer;
1770 struct array_buffer *max_buf = &tr->max_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001771 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1772 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1773
1774 max_buf->cpu = cpu;
1775 max_buf->time_start = data->preempt_timestamp;
1776
1777 max_data->saved_latency = tr->max_latency;
1778 max_data->critical_start = data->critical_start;
1779 max_data->critical_end = data->critical_end;
1780
David Brazdil0f672f62019-12-10 10:32:29 +00001781 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001782 max_data->pid = tsk->pid;
1783 /*
1784 * If tsk == current, then use current_uid(), as that does not use
1785 * RCU. The irq tracer can be called out of RCU scope.
1786 */
1787 if (tsk == current)
1788 max_data->uid = current_uid();
1789 else
1790 max_data->uid = task_uid(tsk);
1791
1792 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1793 max_data->policy = tsk->policy;
1794 max_data->rt_priority = tsk->rt_priority;
1795
1796 /* record this tasks comm */
1797 tracing_record_cmdline(tsk);
Olivier Deprez157378f2022-04-04 15:47:50 +02001798 latency_fsnotify(tr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001799}
1800
1801/**
1802 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1803 * @tr: tracer
1804 * @tsk: the task with the latency
1805 * @cpu: The cpu that initiated the trace.
David Brazdil0f672f62019-12-10 10:32:29 +00001806 * @cond_data: User data associated with a conditional snapshot
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001807 *
1808 * Flip the buffers between the @tr and the max_tr and record information
1809 * about which task was the cause of this latency.
1810 */
1811void
David Brazdil0f672f62019-12-10 10:32:29 +00001812update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1813 void *cond_data)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001814{
1815 if (tr->stop_count)
1816 return;
1817
1818 WARN_ON_ONCE(!irqs_disabled());
1819
1820 if (!tr->allocated_snapshot) {
1821 /* Only the nop tracer should hit this when disabling */
1822 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1823 return;
1824 }
1825
1826 arch_spin_lock(&tr->max_lock);
1827
Olivier Deprez157378f2022-04-04 15:47:50 +02001828 /* Inherit the recordable setting from array_buffer */
1829 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001830 ring_buffer_record_on(tr->max_buffer.buffer);
1831 else
1832 ring_buffer_record_off(tr->max_buffer.buffer);
1833
David Brazdil0f672f62019-12-10 10:32:29 +00001834#ifdef CONFIG_TRACER_SNAPSHOT
1835 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1836 goto out_unlock;
1837#endif
Olivier Deprez157378f2022-04-04 15:47:50 +02001838 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001839
1840 __update_max_tr(tr, tsk, cpu);
David Brazdil0f672f62019-12-10 10:32:29 +00001841
1842 out_unlock:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001843 arch_spin_unlock(&tr->max_lock);
1844}
1845
1846/**
1847 * update_max_tr_single - only copy one trace over, and reset the rest
David Brazdil0f672f62019-12-10 10:32:29 +00001848 * @tr: tracer
1849 * @tsk: task with the latency
1850 * @cpu: the cpu of the buffer to copy.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001851 *
1852 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1853 */
1854void
1855update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1856{
1857 int ret;
1858
1859 if (tr->stop_count)
1860 return;
1861
1862 WARN_ON_ONCE(!irqs_disabled());
1863 if (!tr->allocated_snapshot) {
1864 /* Only the nop tracer should hit this when disabling */
1865 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1866 return;
1867 }
1868
1869 arch_spin_lock(&tr->max_lock);
1870
Olivier Deprez157378f2022-04-04 15:47:50 +02001871 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001872
1873 if (ret == -EBUSY) {
1874 /*
1875 * We failed to swap the buffer due to a commit taking
1876 * place on this CPU. We fail to record, but we reset
1877 * the max trace buffer (no one writes directly to it)
1878 * and flag that it failed.
1879 */
1880 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1881 "Failed to swap buffers due to commit in progress\n");
1882 }
1883
1884 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1885
1886 __update_max_tr(tr, tsk, cpu);
1887 arch_spin_unlock(&tr->max_lock);
1888}
1889#endif /* CONFIG_TRACER_MAX_TRACE */
1890
David Brazdil0f672f62019-12-10 10:32:29 +00001891static int wait_on_pipe(struct trace_iterator *iter, int full)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001892{
1893 /* Iterators are static, they should be filled or empty */
1894 if (trace_buffer_iter(iter, iter->cpu_file))
1895 return 0;
1896
Olivier Deprez157378f2022-04-04 15:47:50 +02001897 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001898 full);
1899}
1900
1901#ifdef CONFIG_FTRACE_STARTUP_TEST
1902static bool selftests_can_run;
1903
1904struct trace_selftests {
1905 struct list_head list;
1906 struct tracer *type;
1907};
1908
1909static LIST_HEAD(postponed_selftests);
1910
1911static int save_selftest(struct tracer *type)
1912{
1913 struct trace_selftests *selftest;
1914
1915 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1916 if (!selftest)
1917 return -ENOMEM;
1918
1919 selftest->type = type;
1920 list_add(&selftest->list, &postponed_selftests);
1921 return 0;
1922}
1923
1924static int run_tracer_selftest(struct tracer *type)
1925{
1926 struct trace_array *tr = &global_trace;
1927 struct tracer *saved_tracer = tr->current_trace;
1928 int ret;
1929
1930 if (!type->selftest || tracing_selftest_disabled)
1931 return 0;
1932
1933 /*
1934 * If a tracer registers early in boot up (before scheduling is
1935 * initialized and such), then do not run its selftests yet.
1936 * Instead, run it a little later in the boot process.
1937 */
1938 if (!selftests_can_run)
1939 return save_selftest(type);
1940
1941 /*
1942 * Run a selftest on this tracer.
1943 * Here we reset the trace buffer, and set the current
1944 * tracer to be this tracer. The tracer can then run some
1945 * internal tracing to verify that everything is in order.
1946 * If we fail, we do not register this tracer.
1947 */
Olivier Deprez157378f2022-04-04 15:47:50 +02001948 tracing_reset_online_cpus(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001949
1950 tr->current_trace = type;
1951
1952#ifdef CONFIG_TRACER_MAX_TRACE
1953 if (type->use_max_tr) {
1954 /* If we expanded the buffers, make sure the max is expanded too */
1955 if (ring_buffer_expanded)
1956 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1957 RING_BUFFER_ALL_CPUS);
1958 tr->allocated_snapshot = true;
1959 }
1960#endif
1961
1962 /* the test is responsible for initializing and enabling */
1963 pr_info("Testing tracer %s: ", type->name);
1964 ret = type->selftest(type, tr);
1965 /* the test is responsible for resetting too */
1966 tr->current_trace = saved_tracer;
1967 if (ret) {
1968 printk(KERN_CONT "FAILED!\n");
1969 /* Add the warning after printing 'FAILED' */
1970 WARN_ON(1);
1971 return -1;
1972 }
1973 /* Only reset on passing, to avoid touching corrupted buffers */
Olivier Deprez157378f2022-04-04 15:47:50 +02001974 tracing_reset_online_cpus(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00001975
1976#ifdef CONFIG_TRACER_MAX_TRACE
1977 if (type->use_max_tr) {
1978 tr->allocated_snapshot = false;
1979
1980 /* Shrink the max buffer again */
1981 if (ring_buffer_expanded)
1982 ring_buffer_resize(tr->max_buffer.buffer, 1,
1983 RING_BUFFER_ALL_CPUS);
1984 }
1985#endif
1986
1987 printk(KERN_CONT "PASSED\n");
1988 return 0;
1989}
1990
1991static __init int init_trace_selftests(void)
1992{
1993 struct trace_selftests *p, *n;
1994 struct tracer *t, **last;
1995 int ret;
1996
1997 selftests_can_run = true;
1998
1999 mutex_lock(&trace_types_lock);
2000
2001 if (list_empty(&postponed_selftests))
2002 goto out;
2003
2004 pr_info("Running postponed tracer tests:\n");
2005
Olivier Deprez0e641232021-09-23 10:07:05 +02002006 tracing_selftest_running = true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002007 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
David Brazdil0f672f62019-12-10 10:32:29 +00002008 /* This loop can take minutes when sanitizers are enabled, so
2009 * lets make sure we allow RCU processing.
2010 */
2011 cond_resched();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002012 ret = run_tracer_selftest(p->type);
2013 /* If the test fails, then warn and remove from available_tracers */
2014 if (ret < 0) {
2015 WARN(1, "tracer: %s failed selftest, disabling\n",
2016 p->type->name);
2017 last = &trace_types;
2018 for (t = trace_types; t; t = t->next) {
2019 if (t == p->type) {
2020 *last = t->next;
2021 break;
2022 }
2023 last = &t->next;
2024 }
2025 }
2026 list_del(&p->list);
2027 kfree(p);
2028 }
Olivier Deprez0e641232021-09-23 10:07:05 +02002029 tracing_selftest_running = false;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002030
2031 out:
2032 mutex_unlock(&trace_types_lock);
2033
2034 return 0;
2035}
2036core_initcall(init_trace_selftests);
2037#else
2038static inline int run_tracer_selftest(struct tracer *type)
2039{
2040 return 0;
2041}
2042#endif /* CONFIG_FTRACE_STARTUP_TEST */
2043
2044static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2045
2046static void __init apply_trace_boot_options(void);
2047
2048/**
2049 * register_tracer - register a tracer with the ftrace system.
David Brazdil0f672f62019-12-10 10:32:29 +00002050 * @type: the plugin for the tracer
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002051 *
2052 * Register a new plugin tracer.
2053 */
2054int __init register_tracer(struct tracer *type)
2055{
2056 struct tracer *t;
2057 int ret = 0;
2058
2059 if (!type->name) {
2060 pr_info("Tracer must have a name\n");
2061 return -1;
2062 }
2063
2064 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2065 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2066 return -1;
2067 }
2068
Olivier Deprez0e641232021-09-23 10:07:05 +02002069 if (security_locked_down(LOCKDOWN_TRACEFS)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002070 pr_warn("Can not register tracer %s due to lockdown\n",
Olivier Deprez0e641232021-09-23 10:07:05 +02002071 type->name);
2072 return -EPERM;
2073 }
2074
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002075 mutex_lock(&trace_types_lock);
2076
2077 tracing_selftest_running = true;
2078
2079 for (t = trace_types; t; t = t->next) {
2080 if (strcmp(type->name, t->name) == 0) {
2081 /* already found */
2082 pr_info("Tracer %s already registered\n",
2083 type->name);
2084 ret = -1;
2085 goto out;
2086 }
2087 }
2088
2089 if (!type->set_flag)
2090 type->set_flag = &dummy_set_flag;
2091 if (!type->flags) {
2092 /*allocate a dummy tracer_flags*/
2093 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2094 if (!type->flags) {
2095 ret = -ENOMEM;
2096 goto out;
2097 }
2098 type->flags->val = 0;
2099 type->flags->opts = dummy_tracer_opt;
2100 } else
2101 if (!type->flags->opts)
2102 type->flags->opts = dummy_tracer_opt;
2103
2104 /* store the tracer for __set_tracer_option */
2105 type->flags->trace = type;
2106
2107 ret = run_tracer_selftest(type);
2108 if (ret < 0)
2109 goto out;
2110
2111 type->next = trace_types;
2112 trace_types = type;
2113 add_tracer_options(&global_trace, type);
2114
2115 out:
2116 tracing_selftest_running = false;
2117 mutex_unlock(&trace_types_lock);
2118
2119 if (ret || !default_bootup_tracer)
2120 goto out_unlock;
2121
2122 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2123 goto out_unlock;
2124
2125 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2126 /* Do we want this tracer to start on bootup? */
2127 tracing_set_tracer(&global_trace, type->name);
2128 default_bootup_tracer = NULL;
2129
2130 apply_trace_boot_options();
2131
2132 /* disable other selftests, since this will break it. */
Olivier Deprez157378f2022-04-04 15:47:50 +02002133 disable_tracing_selftest("running a tracer");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002134
2135 out_unlock:
2136 return ret;
2137}
2138
Olivier Deprez157378f2022-04-04 15:47:50 +02002139static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002140{
Olivier Deprez157378f2022-04-04 15:47:50 +02002141 struct trace_buffer *buffer = buf->buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002142
2143 if (!buffer)
2144 return;
2145
2146 ring_buffer_record_disable(buffer);
2147
2148 /* Make sure all commits have finished */
David Brazdil0f672f62019-12-10 10:32:29 +00002149 synchronize_rcu();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002150 ring_buffer_reset_cpu(buffer, cpu);
2151
2152 ring_buffer_record_enable(buffer);
2153}
2154
Olivier Deprez157378f2022-04-04 15:47:50 +02002155void tracing_reset_online_cpus(struct array_buffer *buf)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002156{
Olivier Deprez157378f2022-04-04 15:47:50 +02002157 struct trace_buffer *buffer = buf->buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002158
2159 if (!buffer)
2160 return;
2161
2162 ring_buffer_record_disable(buffer);
2163
2164 /* Make sure all commits have finished */
David Brazdil0f672f62019-12-10 10:32:29 +00002165 synchronize_rcu();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002166
2167 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2168
Olivier Deprez157378f2022-04-04 15:47:50 +02002169 ring_buffer_reset_online_cpus(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002170
2171 ring_buffer_record_enable(buffer);
2172}
2173
2174/* Must have trace_types_lock held */
2175void tracing_reset_all_online_cpus(void)
2176{
2177 struct trace_array *tr;
2178
2179 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2180 if (!tr->clear_trace)
2181 continue;
2182 tr->clear_trace = false;
Olivier Deprez157378f2022-04-04 15:47:50 +02002183 tracing_reset_online_cpus(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002184#ifdef CONFIG_TRACER_MAX_TRACE
2185 tracing_reset_online_cpus(&tr->max_buffer);
2186#endif
2187 }
2188}
2189
Olivier Deprez0e641232021-09-23 10:07:05 +02002190/*
2191 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2192 * is the tgid last observed corresponding to pid=i.
2193 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002194static int *tgid_map;
2195
Olivier Deprez0e641232021-09-23 10:07:05 +02002196/* The maximum valid index into tgid_map. */
2197static size_t tgid_map_max;
2198
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002199#define SAVED_CMDLINES_DEFAULT 128
2200#define NO_CMDLINE_MAP UINT_MAX
2201static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2202struct saved_cmdlines_buffer {
2203 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2204 unsigned *map_cmdline_to_pid;
2205 unsigned cmdline_num;
2206 int cmdline_idx;
2207 char *saved_cmdlines;
2208};
2209static struct saved_cmdlines_buffer *savedcmd;
2210
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002211static inline char *get_saved_cmdlines(int idx)
2212{
2213 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2214}
2215
2216static inline void set_cmdline(int idx, const char *cmdline)
2217{
David Brazdil0f672f62019-12-10 10:32:29 +00002218 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002219}
2220
2221static int allocate_cmdlines_buffer(unsigned int val,
2222 struct saved_cmdlines_buffer *s)
2223{
2224 s->map_cmdline_to_pid = kmalloc_array(val,
2225 sizeof(*s->map_cmdline_to_pid),
2226 GFP_KERNEL);
2227 if (!s->map_cmdline_to_pid)
2228 return -ENOMEM;
2229
2230 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2231 if (!s->saved_cmdlines) {
2232 kfree(s->map_cmdline_to_pid);
2233 return -ENOMEM;
2234 }
2235
2236 s->cmdline_idx = 0;
2237 s->cmdline_num = val;
2238 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2239 sizeof(s->map_pid_to_cmdline));
2240 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2241 val * sizeof(*s->map_cmdline_to_pid));
2242
2243 return 0;
2244}
2245
2246static int trace_create_savedcmd(void)
2247{
2248 int ret;
2249
2250 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2251 if (!savedcmd)
2252 return -ENOMEM;
2253
2254 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2255 if (ret < 0) {
2256 kfree(savedcmd);
2257 savedcmd = NULL;
2258 return -ENOMEM;
2259 }
2260
2261 return 0;
2262}
2263
2264int is_tracing_stopped(void)
2265{
2266 return global_trace.stop_count;
2267}
2268
2269/**
2270 * tracing_start - quick start of the tracer
2271 *
2272 * If tracing is enabled but was stopped by tracing_stop,
2273 * this will start the tracer back up.
2274 */
2275void tracing_start(void)
2276{
Olivier Deprez157378f2022-04-04 15:47:50 +02002277 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002278 unsigned long flags;
2279
2280 if (tracing_disabled)
2281 return;
2282
2283 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2284 if (--global_trace.stop_count) {
2285 if (global_trace.stop_count < 0) {
2286 /* Someone screwed up their debugging */
2287 WARN_ON_ONCE(1);
2288 global_trace.stop_count = 0;
2289 }
2290 goto out;
2291 }
2292
2293 /* Prevent the buffers from switching */
2294 arch_spin_lock(&global_trace.max_lock);
2295
Olivier Deprez157378f2022-04-04 15:47:50 +02002296 buffer = global_trace.array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002297 if (buffer)
2298 ring_buffer_record_enable(buffer);
2299
2300#ifdef CONFIG_TRACER_MAX_TRACE
2301 buffer = global_trace.max_buffer.buffer;
2302 if (buffer)
2303 ring_buffer_record_enable(buffer);
2304#endif
2305
2306 arch_spin_unlock(&global_trace.max_lock);
2307
2308 out:
2309 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2310}
2311
2312static void tracing_start_tr(struct trace_array *tr)
2313{
Olivier Deprez157378f2022-04-04 15:47:50 +02002314 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002315 unsigned long flags;
2316
2317 if (tracing_disabled)
2318 return;
2319
2320 /* If global, we need to also start the max tracer */
2321 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2322 return tracing_start();
2323
2324 raw_spin_lock_irqsave(&tr->start_lock, flags);
2325
2326 if (--tr->stop_count) {
2327 if (tr->stop_count < 0) {
2328 /* Someone screwed up their debugging */
2329 WARN_ON_ONCE(1);
2330 tr->stop_count = 0;
2331 }
2332 goto out;
2333 }
2334
Olivier Deprez157378f2022-04-04 15:47:50 +02002335 buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002336 if (buffer)
2337 ring_buffer_record_enable(buffer);
2338
2339 out:
2340 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2341}
2342
2343/**
2344 * tracing_stop - quick stop of the tracer
2345 *
2346 * Light weight way to stop tracing. Use in conjunction with
2347 * tracing_start.
2348 */
2349void tracing_stop(void)
2350{
Olivier Deprez157378f2022-04-04 15:47:50 +02002351 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002352 unsigned long flags;
2353
2354 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2355 if (global_trace.stop_count++)
2356 goto out;
2357
2358 /* Prevent the buffers from switching */
2359 arch_spin_lock(&global_trace.max_lock);
2360
Olivier Deprez157378f2022-04-04 15:47:50 +02002361 buffer = global_trace.array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002362 if (buffer)
2363 ring_buffer_record_disable(buffer);
2364
2365#ifdef CONFIG_TRACER_MAX_TRACE
2366 buffer = global_trace.max_buffer.buffer;
2367 if (buffer)
2368 ring_buffer_record_disable(buffer);
2369#endif
2370
2371 arch_spin_unlock(&global_trace.max_lock);
2372
2373 out:
2374 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2375}
2376
2377static void tracing_stop_tr(struct trace_array *tr)
2378{
Olivier Deprez157378f2022-04-04 15:47:50 +02002379 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002380 unsigned long flags;
2381
2382 /* If global, we need to also stop the max tracer */
2383 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2384 return tracing_stop();
2385
2386 raw_spin_lock_irqsave(&tr->start_lock, flags);
2387 if (tr->stop_count++)
2388 goto out;
2389
Olivier Deprez157378f2022-04-04 15:47:50 +02002390 buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002391 if (buffer)
2392 ring_buffer_record_disable(buffer);
2393
2394 out:
2395 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2396}
2397
2398static int trace_save_cmdline(struct task_struct *tsk)
2399{
Olivier Deprez0e641232021-09-23 10:07:05 +02002400 unsigned tpid, idx;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002401
2402 /* treat recording of idle task as a success */
2403 if (!tsk->pid)
2404 return 1;
2405
Olivier Deprez0e641232021-09-23 10:07:05 +02002406 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002407
2408 /*
2409 * It's not the end of the world if we don't get
2410 * the lock, but we also don't want to spin
2411 * nor do we want to disable interrupts,
2412 * so if we miss here, then better luck next time.
2413 */
2414 if (!arch_spin_trylock(&trace_cmdline_lock))
2415 return 0;
2416
Olivier Deprez0e641232021-09-23 10:07:05 +02002417 idx = savedcmd->map_pid_to_cmdline[tpid];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002418 if (idx == NO_CMDLINE_MAP) {
2419 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2420
Olivier Deprez0e641232021-09-23 10:07:05 +02002421 savedcmd->map_pid_to_cmdline[tpid] = idx;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002422 savedcmd->cmdline_idx = idx;
2423 }
2424
Olivier Deprez0e641232021-09-23 10:07:05 +02002425 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002426 set_cmdline(idx, tsk->comm);
2427
2428 arch_spin_unlock(&trace_cmdline_lock);
2429
2430 return 1;
2431}
2432
2433static void __trace_find_cmdline(int pid, char comm[])
2434{
2435 unsigned map;
Olivier Deprez0e641232021-09-23 10:07:05 +02002436 int tpid;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002437
2438 if (!pid) {
2439 strcpy(comm, "<idle>");
2440 return;
2441 }
2442
2443 if (WARN_ON_ONCE(pid < 0)) {
2444 strcpy(comm, "<XXX>");
2445 return;
2446 }
2447
Olivier Deprez0e641232021-09-23 10:07:05 +02002448 tpid = pid & (PID_MAX_DEFAULT - 1);
2449 map = savedcmd->map_pid_to_cmdline[tpid];
2450 if (map != NO_CMDLINE_MAP) {
2451 tpid = savedcmd->map_cmdline_to_pid[map];
2452 if (tpid == pid) {
2453 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2454 return;
2455 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002456 }
Olivier Deprez0e641232021-09-23 10:07:05 +02002457 strcpy(comm, "<...>");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002458}
2459
2460void trace_find_cmdline(int pid, char comm[])
2461{
2462 preempt_disable();
2463 arch_spin_lock(&trace_cmdline_lock);
2464
2465 __trace_find_cmdline(pid, comm);
2466
2467 arch_spin_unlock(&trace_cmdline_lock);
2468 preempt_enable();
2469}
2470
Olivier Deprez0e641232021-09-23 10:07:05 +02002471static int *trace_find_tgid_ptr(int pid)
2472{
2473 /*
2474 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2475 * if we observe a non-NULL tgid_map then we also observe the correct
2476 * tgid_map_max.
2477 */
2478 int *map = smp_load_acquire(&tgid_map);
2479
2480 if (unlikely(!map || pid > tgid_map_max))
2481 return NULL;
2482
2483 return &map[pid];
2484}
2485
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002486int trace_find_tgid(int pid)
2487{
Olivier Deprez0e641232021-09-23 10:07:05 +02002488 int *ptr = trace_find_tgid_ptr(pid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002489
Olivier Deprez0e641232021-09-23 10:07:05 +02002490 return ptr ? *ptr : 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002491}
2492
2493static int trace_save_tgid(struct task_struct *tsk)
2494{
Olivier Deprez0e641232021-09-23 10:07:05 +02002495 int *ptr;
2496
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002497 /* treat recording of idle task as a success */
2498 if (!tsk->pid)
2499 return 1;
2500
Olivier Deprez0e641232021-09-23 10:07:05 +02002501 ptr = trace_find_tgid_ptr(tsk->pid);
2502 if (!ptr)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002503 return 0;
2504
Olivier Deprez0e641232021-09-23 10:07:05 +02002505 *ptr = tsk->tgid;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002506 return 1;
2507}
2508
2509static bool tracing_record_taskinfo_skip(int flags)
2510{
2511 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2512 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002513 if (!__this_cpu_read(trace_taskinfo_save))
2514 return true;
2515 return false;
2516}
2517
2518/**
2519 * tracing_record_taskinfo - record the task info of a task
2520 *
David Brazdil0f672f62019-12-10 10:32:29 +00002521 * @task: task to record
2522 * @flags: TRACE_RECORD_CMDLINE for recording comm
2523 * TRACE_RECORD_TGID for recording tgid
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002524 */
2525void tracing_record_taskinfo(struct task_struct *task, int flags)
2526{
2527 bool done;
2528
2529 if (tracing_record_taskinfo_skip(flags))
2530 return;
2531
2532 /*
2533 * Record as much task information as possible. If some fail, continue
2534 * to try to record the others.
2535 */
2536 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2537 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2538
2539 /* If recording any information failed, retry again soon. */
2540 if (!done)
2541 return;
2542
2543 __this_cpu_write(trace_taskinfo_save, false);
2544}
2545
2546/**
2547 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2548 *
David Brazdil0f672f62019-12-10 10:32:29 +00002549 * @prev: previous task during sched_switch
2550 * @next: next task during sched_switch
2551 * @flags: TRACE_RECORD_CMDLINE for recording comm
2552 * TRACE_RECORD_TGID for recording tgid
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002553 */
2554void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2555 struct task_struct *next, int flags)
2556{
2557 bool done;
2558
2559 if (tracing_record_taskinfo_skip(flags))
2560 return;
2561
2562 /*
2563 * Record as much task information as possible. If some fail, continue
2564 * to try to record the others.
2565 */
2566 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2567 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2568 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2569 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2570
2571 /* If recording any information failed, retry again soon. */
2572 if (!done)
2573 return;
2574
2575 __this_cpu_write(trace_taskinfo_save, false);
2576}
2577
2578/* Helpers to record a specific task information */
2579void tracing_record_cmdline(struct task_struct *task)
2580{
2581 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2582}
2583
2584void tracing_record_tgid(struct task_struct *task)
2585{
2586 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2587}
2588
2589/*
2590 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2591 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2592 * simplifies those functions and keeps them in sync.
2593 */
2594enum print_line_t trace_handle_return(struct trace_seq *s)
2595{
2596 return trace_seq_has_overflowed(s) ?
2597 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2598}
2599EXPORT_SYMBOL_GPL(trace_handle_return);
2600
2601void
David Brazdil0f672f62019-12-10 10:32:29 +00002602tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2603 unsigned long flags, int pc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002604{
2605 struct task_struct *tsk = current;
2606
2607 entry->preempt_count = pc & 0xff;
2608 entry->pid = (tsk) ? tsk->pid : 0;
David Brazdil0f672f62019-12-10 10:32:29 +00002609 entry->type = type;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002610 entry->flags =
2611#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2612 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2613#else
2614 TRACE_FLAG_IRQS_NOSUPPORT |
2615#endif
2616 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2617 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2618 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2619 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2620 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2621}
2622EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2623
2624struct ring_buffer_event *
Olivier Deprez157378f2022-04-04 15:47:50 +02002625trace_buffer_lock_reserve(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002626 int type,
2627 unsigned long len,
2628 unsigned long flags, int pc)
2629{
2630 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2631}
2632
2633DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2634DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2635static int trace_buffered_event_ref;
2636
2637/**
2638 * trace_buffered_event_enable - enable buffering events
2639 *
2640 * When events are being filtered, it is quicker to use a temporary
2641 * buffer to write the event data into if there's a likely chance
2642 * that it will not be committed. The discard of the ring buffer
2643 * is not as fast as committing, and is much slower than copying
2644 * a commit.
2645 *
2646 * When an event is to be filtered, allocate per cpu buffers to
2647 * write the event data into, and if the event is filtered and discarded
2648 * it is simply dropped, otherwise, the entire data is to be committed
2649 * in one shot.
2650 */
2651void trace_buffered_event_enable(void)
2652{
2653 struct ring_buffer_event *event;
2654 struct page *page;
2655 int cpu;
2656
2657 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2658
2659 if (trace_buffered_event_ref++)
2660 return;
2661
2662 for_each_tracing_cpu(cpu) {
2663 page = alloc_pages_node(cpu_to_node(cpu),
2664 GFP_KERNEL | __GFP_NORETRY, 0);
2665 if (!page)
2666 goto failed;
2667
2668 event = page_address(page);
2669 memset(event, 0, sizeof(*event));
2670
2671 per_cpu(trace_buffered_event, cpu) = event;
2672
2673 preempt_disable();
2674 if (cpu == smp_processor_id() &&
Olivier Deprez157378f2022-04-04 15:47:50 +02002675 __this_cpu_read(trace_buffered_event) !=
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002676 per_cpu(trace_buffered_event, cpu))
2677 WARN_ON_ONCE(1);
2678 preempt_enable();
2679 }
2680
2681 return;
2682 failed:
2683 trace_buffered_event_disable();
2684}
2685
2686static void enable_trace_buffered_event(void *data)
2687{
2688 /* Probably not needed, but do it anyway */
2689 smp_rmb();
2690 this_cpu_dec(trace_buffered_event_cnt);
2691}
2692
2693static void disable_trace_buffered_event(void *data)
2694{
2695 this_cpu_inc(trace_buffered_event_cnt);
2696}
2697
2698/**
2699 * trace_buffered_event_disable - disable buffering events
2700 *
2701 * When a filter is removed, it is faster to not use the buffered
2702 * events, and to commit directly into the ring buffer. Free up
2703 * the temp buffers when there are no more users. This requires
2704 * special synchronization with current events.
2705 */
2706void trace_buffered_event_disable(void)
2707{
2708 int cpu;
2709
2710 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2711
2712 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2713 return;
2714
2715 if (--trace_buffered_event_ref)
2716 return;
2717
2718 preempt_disable();
2719 /* For each CPU, set the buffer as used. */
2720 smp_call_function_many(tracing_buffer_mask,
2721 disable_trace_buffered_event, NULL, 1);
2722 preempt_enable();
2723
2724 /* Wait for all current users to finish */
David Brazdil0f672f62019-12-10 10:32:29 +00002725 synchronize_rcu();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002726
2727 for_each_tracing_cpu(cpu) {
2728 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2729 per_cpu(trace_buffered_event, cpu) = NULL;
2730 }
2731 /*
2732 * Make sure trace_buffered_event is NULL before clearing
2733 * trace_buffered_event_cnt.
2734 */
2735 smp_wmb();
2736
2737 preempt_disable();
2738 /* Do the work on each cpu */
2739 smp_call_function_many(tracing_buffer_mask,
2740 enable_trace_buffered_event, NULL, 1);
2741 preempt_enable();
2742}
2743
Olivier Deprez157378f2022-04-04 15:47:50 +02002744static struct trace_buffer *temp_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002745
2746struct ring_buffer_event *
Olivier Deprez157378f2022-04-04 15:47:50 +02002747trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002748 struct trace_event_file *trace_file,
2749 int type, unsigned long len,
2750 unsigned long flags, int pc)
2751{
2752 struct ring_buffer_event *entry;
2753 int val;
2754
Olivier Deprez157378f2022-04-04 15:47:50 +02002755 *current_rb = trace_file->tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002756
2757 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2758 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2759 (entry = this_cpu_read(trace_buffered_event))) {
2760 /* Try to use the per cpu buffer first */
2761 val = this_cpu_inc_return(trace_buffered_event_cnt);
Olivier Deprez0e641232021-09-23 10:07:05 +02002762 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002763 trace_event_setup(entry, type, flags, pc);
2764 entry->array[0] = len;
2765 return entry;
2766 }
2767 this_cpu_dec(trace_buffered_event_cnt);
2768 }
2769
2770 entry = __trace_buffer_lock_reserve(*current_rb,
2771 type, len, flags, pc);
2772 /*
2773 * If tracing is off, but we have triggers enabled
2774 * we still need to look at the event data. Use the temp_buffer
Olivier Deprez0e641232021-09-23 10:07:05 +02002775 * to store the trace event for the trigger to use. It's recursive
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002776 * safe and will not be recorded anywhere.
2777 */
2778 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2779 *current_rb = temp_buffer;
2780 entry = __trace_buffer_lock_reserve(*current_rb,
2781 type, len, flags, pc);
2782 }
2783 return entry;
2784}
2785EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2786
2787static DEFINE_SPINLOCK(tracepoint_iter_lock);
2788static DEFINE_MUTEX(tracepoint_printk_mutex);
2789
2790static void output_printk(struct trace_event_buffer *fbuffer)
2791{
2792 struct trace_event_call *event_call;
Olivier Deprez157378f2022-04-04 15:47:50 +02002793 struct trace_event_file *file;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002794 struct trace_event *event;
2795 unsigned long flags;
2796 struct trace_iterator *iter = tracepoint_print_iter;
2797
2798 /* We should never get here if iter is NULL */
2799 if (WARN_ON_ONCE(!iter))
2800 return;
2801
2802 event_call = fbuffer->trace_file->event_call;
2803 if (!event_call || !event_call->event.funcs ||
2804 !event_call->event.funcs->trace)
2805 return;
2806
Olivier Deprez157378f2022-04-04 15:47:50 +02002807 file = fbuffer->trace_file;
2808 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2809 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2810 !filter_match_preds(file->filter, fbuffer->entry)))
2811 return;
2812
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002813 event = &fbuffer->trace_file->event_call->event;
2814
2815 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2816 trace_seq_init(&iter->seq);
2817 iter->ent = fbuffer->entry;
2818 event_call->event.funcs->trace(iter, 0, event);
2819 trace_seq_putc(&iter->seq, 0);
2820 printk("%s", iter->seq.buffer);
2821
2822 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2823}
2824
2825int tracepoint_printk_sysctl(struct ctl_table *table, int write,
Olivier Deprez157378f2022-04-04 15:47:50 +02002826 void *buffer, size_t *lenp,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002827 loff_t *ppos)
2828{
2829 int save_tracepoint_printk;
2830 int ret;
2831
2832 mutex_lock(&tracepoint_printk_mutex);
2833 save_tracepoint_printk = tracepoint_printk;
2834
2835 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2836
2837 /*
2838 * This will force exiting early, as tracepoint_printk
2839 * is always zero when tracepoint_printk_iter is not allocated
2840 */
2841 if (!tracepoint_print_iter)
2842 tracepoint_printk = 0;
2843
2844 if (save_tracepoint_printk == tracepoint_printk)
2845 goto out;
2846
2847 if (tracepoint_printk)
2848 static_key_enable(&tracepoint_printk_key.key);
2849 else
2850 static_key_disable(&tracepoint_printk_key.key);
2851
2852 out:
2853 mutex_unlock(&tracepoint_printk_mutex);
2854
2855 return ret;
2856}
2857
2858void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2859{
2860 if (static_key_false(&tracepoint_printk_key.key))
2861 output_printk(fbuffer);
2862
Olivier Deprez157378f2022-04-04 15:47:50 +02002863 if (static_branch_unlikely(&trace_event_exports_enabled))
2864 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2865 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002866 fbuffer->event, fbuffer->entry,
Olivier Deprez157378f2022-04-04 15:47:50 +02002867 fbuffer->flags, fbuffer->pc, fbuffer->regs);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002868}
2869EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2870
2871/*
2872 * Skip 3:
2873 *
2874 * trace_buffer_unlock_commit_regs()
2875 * trace_event_buffer_commit()
2876 * trace_event_raw_event_xxx()
2877 */
2878# define STACK_SKIP 3
2879
2880void trace_buffer_unlock_commit_regs(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +02002881 struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002882 struct ring_buffer_event *event,
2883 unsigned long flags, int pc,
2884 struct pt_regs *regs)
2885{
2886 __buffer_unlock_commit(buffer, event);
2887
2888 /*
2889 * If regs is not set, then skip the necessary functions.
2890 * Note, we can still get here via blktrace, wakeup tracer
2891 * and mmiotrace, but that's ok if they lose a function or
2892 * two. They are not that meaningful.
2893 */
2894 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
Olivier Deprez0e641232021-09-23 10:07:05 +02002895 ftrace_trace_userstack(tr, buffer, flags, pc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002896}
2897
2898/*
2899 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2900 */
2901void
Olivier Deprez157378f2022-04-04 15:47:50 +02002902trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002903 struct ring_buffer_event *event)
2904{
2905 __buffer_unlock_commit(buffer, event);
2906}
2907
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002908void
2909trace_function(struct trace_array *tr,
2910 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2911 int pc)
2912{
2913 struct trace_event_call *call = &event_function;
Olivier Deprez157378f2022-04-04 15:47:50 +02002914 struct trace_buffer *buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002915 struct ring_buffer_event *event;
2916 struct ftrace_entry *entry;
2917
2918 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2919 flags, pc);
2920 if (!event)
2921 return;
2922 entry = ring_buffer_event_data(event);
2923 entry->ip = ip;
2924 entry->parent_ip = parent_ip;
2925
2926 if (!call_filter_check_discard(call, entry, buffer, event)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02002927 if (static_branch_unlikely(&trace_function_exports_enabled))
2928 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002929 __buffer_unlock_commit(buffer, event);
2930 }
2931}
2932
2933#ifdef CONFIG_STACKTRACE
2934
David Brazdil0f672f62019-12-10 10:32:29 +00002935/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2936#define FTRACE_KSTACK_NESTING 4
2937
2938#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2939
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002940struct ftrace_stack {
David Brazdil0f672f62019-12-10 10:32:29 +00002941 unsigned long calls[FTRACE_KSTACK_ENTRIES];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002942};
2943
David Brazdil0f672f62019-12-10 10:32:29 +00002944
2945struct ftrace_stacks {
2946 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2947};
2948
2949static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002950static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2951
Olivier Deprez157378f2022-04-04 15:47:50 +02002952static void __ftrace_trace_stack(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002953 unsigned long flags,
2954 int skip, int pc, struct pt_regs *regs)
2955{
2956 struct trace_event_call *call = &event_kernel_stack;
2957 struct ring_buffer_event *event;
David Brazdil0f672f62019-12-10 10:32:29 +00002958 unsigned int size, nr_entries;
2959 struct ftrace_stack *fstack;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002960 struct stack_entry *entry;
David Brazdil0f672f62019-12-10 10:32:29 +00002961 int stackidx;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002962
2963 /*
2964 * Add one, for this function and the call to save_stack_trace()
2965 * If regs is set, then these functions will not be in the way.
2966 */
2967#ifndef CONFIG_UNWINDER_ORC
2968 if (!regs)
David Brazdil0f672f62019-12-10 10:32:29 +00002969 skip++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002970#endif
2971
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002972 preempt_disable_notrace();
2973
David Brazdil0f672f62019-12-10 10:32:29 +00002974 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2975
2976 /* This should never happen. If it does, yell once and skip */
Olivier Deprez0e641232021-09-23 10:07:05 +02002977 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
David Brazdil0f672f62019-12-10 10:32:29 +00002978 goto out;
2979
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002980 /*
David Brazdil0f672f62019-12-10 10:32:29 +00002981 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2982 * interrupt will either see the value pre increment or post
2983 * increment. If the interrupt happens pre increment it will have
2984 * restored the counter when it returns. We just need a barrier to
2985 * keep gcc from moving things around.
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002986 */
2987 barrier();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002988
David Brazdil0f672f62019-12-10 10:32:29 +00002989 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2990 size = ARRAY_SIZE(fstack->calls);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002991
David Brazdil0f672f62019-12-10 10:32:29 +00002992 if (regs) {
2993 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2994 size, skip);
2995 } else {
2996 nr_entries = stack_trace_save(fstack->calls, size, skip);
2997 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00002998
David Brazdil0f672f62019-12-10 10:32:29 +00002999 size = nr_entries * sizeof(unsigned long);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003000 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
Olivier Deprez0e641232021-09-23 10:07:05 +02003001 (sizeof(*entry) - sizeof(entry->caller)) + size,
3002 flags, pc);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003003 if (!event)
3004 goto out;
3005 entry = ring_buffer_event_data(event);
3006
David Brazdil0f672f62019-12-10 10:32:29 +00003007 memcpy(&entry->caller, fstack->calls, size);
3008 entry->size = nr_entries;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003009
3010 if (!call_filter_check_discard(call, entry, buffer, event))
3011 __buffer_unlock_commit(buffer, event);
3012
3013 out:
3014 /* Again, don't let gcc optimize things here */
3015 barrier();
3016 __this_cpu_dec(ftrace_stack_reserve);
3017 preempt_enable_notrace();
3018
3019}
3020
3021static inline void ftrace_trace_stack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +02003022 struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003023 unsigned long flags,
3024 int skip, int pc, struct pt_regs *regs)
3025{
3026 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3027 return;
3028
3029 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
3030}
3031
3032void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
3033 int pc)
3034{
Olivier Deprez157378f2022-04-04 15:47:50 +02003035 struct trace_buffer *buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003036
3037 if (rcu_is_watching()) {
3038 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3039 return;
3040 }
3041
3042 /*
3043 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3044 * but if the above rcu_is_watching() failed, then the NMI
3045 * triggered someplace critical, and rcu_irq_enter() should
3046 * not be called from NMI.
3047 */
3048 if (unlikely(in_nmi()))
3049 return;
3050
3051 rcu_irq_enter_irqson();
3052 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
3053 rcu_irq_exit_irqson();
3054}
3055
3056/**
3057 * trace_dump_stack - record a stack back trace in the trace buffer
3058 * @skip: Number of functions to skip (helper handlers)
3059 */
3060void trace_dump_stack(int skip)
3061{
3062 unsigned long flags;
3063
3064 if (tracing_disabled || tracing_selftest_running)
3065 return;
3066
3067 local_save_flags(flags);
3068
3069#ifndef CONFIG_UNWINDER_ORC
3070 /* Skip 1 to skip this function. */
3071 skip++;
3072#endif
Olivier Deprez157378f2022-04-04 15:47:50 +02003073 __ftrace_trace_stack(global_trace.array_buffer.buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003074 flags, skip, preempt_count(), NULL);
3075}
David Brazdil0f672f62019-12-10 10:32:29 +00003076EXPORT_SYMBOL_GPL(trace_dump_stack);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003077
David Brazdil0f672f62019-12-10 10:32:29 +00003078#ifdef CONFIG_USER_STACKTRACE_SUPPORT
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003079static DEFINE_PER_CPU(int, user_stack_count);
3080
David Brazdil0f672f62019-12-10 10:32:29 +00003081static void
Olivier Deprez0e641232021-09-23 10:07:05 +02003082ftrace_trace_userstack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +02003083 struct trace_buffer *buffer, unsigned long flags, int pc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003084{
3085 struct trace_event_call *call = &event_user_stack;
3086 struct ring_buffer_event *event;
3087 struct userstack_entry *entry;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003088
Olivier Deprez0e641232021-09-23 10:07:05 +02003089 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003090 return;
3091
3092 /*
3093 * NMIs can not handle page faults, even with fix ups.
3094 * The save user stack can (and often does) fault.
3095 */
3096 if (unlikely(in_nmi()))
3097 return;
3098
3099 /*
3100 * prevent recursion, since the user stack tracing may
3101 * trigger other kernel events.
3102 */
3103 preempt_disable();
3104 if (__this_cpu_read(user_stack_count))
3105 goto out;
3106
3107 __this_cpu_inc(user_stack_count);
3108
3109 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3110 sizeof(*entry), flags, pc);
3111 if (!event)
3112 goto out_drop_count;
3113 entry = ring_buffer_event_data(event);
3114
3115 entry->tgid = current->tgid;
3116 memset(&entry->caller, 0, sizeof(entry->caller));
3117
David Brazdil0f672f62019-12-10 10:32:29 +00003118 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003119 if (!call_filter_check_discard(call, entry, buffer, event))
3120 __buffer_unlock_commit(buffer, event);
3121
3122 out_drop_count:
3123 __this_cpu_dec(user_stack_count);
3124 out:
3125 preempt_enable();
3126}
David Brazdil0f672f62019-12-10 10:32:29 +00003127#else /* CONFIG_USER_STACKTRACE_SUPPORT */
Olivier Deprez0e641232021-09-23 10:07:05 +02003128static void ftrace_trace_userstack(struct trace_array *tr,
Olivier Deprez157378f2022-04-04 15:47:50 +02003129 struct trace_buffer *buffer,
David Brazdil0f672f62019-12-10 10:32:29 +00003130 unsigned long flags, int pc)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003131{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003132}
David Brazdil0f672f62019-12-10 10:32:29 +00003133#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003134
3135#endif /* CONFIG_STACKTRACE */
3136
3137/* created for use with alloc_percpu */
3138struct trace_buffer_struct {
3139 int nesting;
3140 char buffer[4][TRACE_BUF_SIZE];
3141};
3142
Olivier Deprez157378f2022-04-04 15:47:50 +02003143static struct trace_buffer_struct __percpu *trace_percpu_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003144
3145/*
3146 * Thise allows for lockless recording. If we're nested too deeply, then
3147 * this returns NULL.
3148 */
3149static char *get_trace_buf(void)
3150{
3151 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3152
Olivier Deprez157378f2022-04-04 15:47:50 +02003153 if (!trace_percpu_buffer || buffer->nesting >= 4)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003154 return NULL;
3155
3156 buffer->nesting++;
3157
3158 /* Interrupts must see nesting incremented before we use the buffer */
3159 barrier();
Olivier Deprez0e641232021-09-23 10:07:05 +02003160 return &buffer->buffer[buffer->nesting - 1][0];
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003161}
3162
3163static void put_trace_buf(void)
3164{
3165 /* Don't let the decrement of nesting leak before this */
3166 barrier();
3167 this_cpu_dec(trace_percpu_buffer->nesting);
3168}
3169
3170static int alloc_percpu_trace_buffer(void)
3171{
Olivier Deprez157378f2022-04-04 15:47:50 +02003172 struct trace_buffer_struct __percpu *buffers;
3173
3174 if (trace_percpu_buffer)
3175 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003176
3177 buffers = alloc_percpu(struct trace_buffer_struct);
Olivier Deprez157378f2022-04-04 15:47:50 +02003178 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003179 return -ENOMEM;
3180
3181 trace_percpu_buffer = buffers;
3182 return 0;
3183}
3184
3185static int buffers_allocated;
3186
3187void trace_printk_init_buffers(void)
3188{
3189 if (buffers_allocated)
3190 return;
3191
3192 if (alloc_percpu_trace_buffer())
3193 return;
3194
3195 /* trace_printk() is for debug use only. Don't use it in production. */
3196
3197 pr_warn("\n");
3198 pr_warn("**********************************************************\n");
3199 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3200 pr_warn("** **\n");
3201 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3202 pr_warn("** **\n");
3203 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3204 pr_warn("** unsafe for production use. **\n");
3205 pr_warn("** **\n");
3206 pr_warn("** If you see this message and you are not debugging **\n");
3207 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3208 pr_warn("** **\n");
3209 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3210 pr_warn("**********************************************************\n");
3211
3212 /* Expand the buffers to set size */
3213 tracing_update_buffers();
3214
3215 buffers_allocated = 1;
3216
3217 /*
3218 * trace_printk_init_buffers() can be called by modules.
3219 * If that happens, then we need to start cmdline recording
3220 * directly here. If the global_trace.buffer is already
3221 * allocated here, then this was called by module code.
3222 */
Olivier Deprez157378f2022-04-04 15:47:50 +02003223 if (global_trace.array_buffer.buffer)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003224 tracing_start_cmdline_record();
3225}
David Brazdil0f672f62019-12-10 10:32:29 +00003226EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003227
3228void trace_printk_start_comm(void)
3229{
3230 /* Start tracing comms if trace printk is set */
3231 if (!buffers_allocated)
3232 return;
3233 tracing_start_cmdline_record();
3234}
3235
3236static void trace_printk_start_stop_comm(int enabled)
3237{
3238 if (!buffers_allocated)
3239 return;
3240
3241 if (enabled)
3242 tracing_start_cmdline_record();
3243 else
3244 tracing_stop_cmdline_record();
3245}
3246
3247/**
3248 * trace_vbprintk - write binary msg to tracing buffer
David Brazdil0f672f62019-12-10 10:32:29 +00003249 * @ip: The address of the caller
3250 * @fmt: The string format to write to the buffer
3251 * @args: Arguments for @fmt
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003252 */
3253int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3254{
3255 struct trace_event_call *call = &event_bprint;
3256 struct ring_buffer_event *event;
Olivier Deprez157378f2022-04-04 15:47:50 +02003257 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003258 struct trace_array *tr = &global_trace;
3259 struct bprint_entry *entry;
3260 unsigned long flags;
3261 char *tbuffer;
3262 int len = 0, size, pc;
3263
3264 if (unlikely(tracing_selftest_running || tracing_disabled))
3265 return 0;
3266
3267 /* Don't pollute graph traces with trace_vprintk internals */
3268 pause_graph_tracing();
3269
3270 pc = preempt_count();
3271 preempt_disable_notrace();
3272
3273 tbuffer = get_trace_buf();
3274 if (!tbuffer) {
3275 len = 0;
3276 goto out_nobuffer;
3277 }
3278
3279 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3280
3281 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
Olivier Deprez157378f2022-04-04 15:47:50 +02003282 goto out_put;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003283
3284 local_save_flags(flags);
3285 size = sizeof(*entry) + sizeof(u32) * len;
Olivier Deprez157378f2022-04-04 15:47:50 +02003286 buffer = tr->array_buffer.buffer;
3287 ring_buffer_nest_start(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003288 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3289 flags, pc);
3290 if (!event)
3291 goto out;
3292 entry = ring_buffer_event_data(event);
3293 entry->ip = ip;
3294 entry->fmt = fmt;
3295
3296 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3297 if (!call_filter_check_discard(call, entry, buffer, event)) {
3298 __buffer_unlock_commit(buffer, event);
3299 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3300 }
3301
3302out:
Olivier Deprez157378f2022-04-04 15:47:50 +02003303 ring_buffer_nest_end(buffer);
3304out_put:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003305 put_trace_buf();
3306
3307out_nobuffer:
3308 preempt_enable_notrace();
3309 unpause_graph_tracing();
3310
3311 return len;
3312}
3313EXPORT_SYMBOL_GPL(trace_vbprintk);
3314
3315__printf(3, 0)
3316static int
Olivier Deprez157378f2022-04-04 15:47:50 +02003317__trace_array_vprintk(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003318 unsigned long ip, const char *fmt, va_list args)
3319{
3320 struct trace_event_call *call = &event_print;
3321 struct ring_buffer_event *event;
3322 int len = 0, size, pc;
3323 struct print_entry *entry;
3324 unsigned long flags;
3325 char *tbuffer;
3326
3327 if (tracing_disabled || tracing_selftest_running)
3328 return 0;
3329
3330 /* Don't pollute graph traces with trace_vprintk internals */
3331 pause_graph_tracing();
3332
3333 pc = preempt_count();
3334 preempt_disable_notrace();
3335
3336
3337 tbuffer = get_trace_buf();
3338 if (!tbuffer) {
3339 len = 0;
3340 goto out_nobuffer;
3341 }
3342
3343 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3344
3345 local_save_flags(flags);
3346 size = sizeof(*entry) + len + 1;
Olivier Deprez157378f2022-04-04 15:47:50 +02003347 ring_buffer_nest_start(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003348 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3349 flags, pc);
3350 if (!event)
3351 goto out;
3352 entry = ring_buffer_event_data(event);
3353 entry->ip = ip;
3354
3355 memcpy(&entry->buf, tbuffer, len + 1);
3356 if (!call_filter_check_discard(call, entry, buffer, event)) {
3357 __buffer_unlock_commit(buffer, event);
3358 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3359 }
3360
3361out:
Olivier Deprez157378f2022-04-04 15:47:50 +02003362 ring_buffer_nest_end(buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003363 put_trace_buf();
3364
3365out_nobuffer:
3366 preempt_enable_notrace();
3367 unpause_graph_tracing();
3368
3369 return len;
3370}
3371
3372__printf(3, 0)
3373int trace_array_vprintk(struct trace_array *tr,
3374 unsigned long ip, const char *fmt, va_list args)
3375{
Olivier Deprez157378f2022-04-04 15:47:50 +02003376 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003377}
3378
Olivier Deprez157378f2022-04-04 15:47:50 +02003379/**
3380 * trace_array_printk - Print a message to a specific instance
3381 * @tr: The instance trace_array descriptor
3382 * @ip: The instruction pointer that this is called from.
3383 * @fmt: The format to print (printf format)
3384 *
3385 * If a subsystem sets up its own instance, they have the right to
3386 * printk strings into their tracing instance buffer using this
3387 * function. Note, this function will not write into the top level
3388 * buffer (use trace_printk() for that), as writing into the top level
3389 * buffer should only have events that can be individually disabled.
3390 * trace_printk() is only used for debugging a kernel, and should not
3391 * be ever encorporated in normal use.
3392 *
3393 * trace_array_printk() can be used, as it will not add noise to the
3394 * top level tracing buffer.
3395 *
3396 * Note, trace_array_init_printk() must be called on @tr before this
3397 * can be used.
3398 */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003399__printf(3, 0)
3400int trace_array_printk(struct trace_array *tr,
3401 unsigned long ip, const char *fmt, ...)
3402{
3403 int ret;
3404 va_list ap;
3405
Olivier Deprez0e641232021-09-23 10:07:05 +02003406 if (!tr)
3407 return -ENOENT;
3408
Olivier Deprez157378f2022-04-04 15:47:50 +02003409 /* This is only allowed for created instances */
3410 if (tr == &global_trace)
3411 return 0;
3412
3413 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3414 return 0;
3415
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003416 va_start(ap, fmt);
3417 ret = trace_array_vprintk(tr, ip, fmt, ap);
3418 va_end(ap);
3419 return ret;
3420}
David Brazdil0f672f62019-12-10 10:32:29 +00003421EXPORT_SYMBOL_GPL(trace_array_printk);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003422
Olivier Deprez157378f2022-04-04 15:47:50 +02003423/**
3424 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3425 * @tr: The trace array to initialize the buffers for
3426 *
3427 * As trace_array_printk() only writes into instances, they are OK to
3428 * have in the kernel (unlike trace_printk()). This needs to be called
3429 * before trace_array_printk() can be used on a trace_array.
3430 */
3431int trace_array_init_printk(struct trace_array *tr)
3432{
3433 if (!tr)
3434 return -ENOENT;
3435
3436 /* This is only allowed for created instances */
3437 if (tr == &global_trace)
3438 return -EINVAL;
3439
3440 return alloc_percpu_trace_buffer();
3441}
3442EXPORT_SYMBOL_GPL(trace_array_init_printk);
3443
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003444__printf(3, 4)
Olivier Deprez157378f2022-04-04 15:47:50 +02003445int trace_array_printk_buf(struct trace_buffer *buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003446 unsigned long ip, const char *fmt, ...)
3447{
3448 int ret;
3449 va_list ap;
3450
3451 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3452 return 0;
3453
3454 va_start(ap, fmt);
3455 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3456 va_end(ap);
3457 return ret;
3458}
3459
3460__printf(2, 0)
3461int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3462{
3463 return trace_array_vprintk(&global_trace, ip, fmt, args);
3464}
3465EXPORT_SYMBOL_GPL(trace_vprintk);
3466
3467static void trace_iterator_increment(struct trace_iterator *iter)
3468{
3469 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3470
3471 iter->idx++;
3472 if (buf_iter)
Olivier Deprez157378f2022-04-04 15:47:50 +02003473 ring_buffer_iter_advance(buf_iter);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003474}
3475
3476static struct trace_entry *
3477peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3478 unsigned long *lost_events)
3479{
3480 struct ring_buffer_event *event;
3481 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3482
Olivier Deprez157378f2022-04-04 15:47:50 +02003483 if (buf_iter) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003484 event = ring_buffer_iter_peek(buf_iter, ts);
Olivier Deprez157378f2022-04-04 15:47:50 +02003485 if (lost_events)
3486 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3487 (unsigned long)-1 : 0;
3488 } else {
3489 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003490 lost_events);
Olivier Deprez157378f2022-04-04 15:47:50 +02003491 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003492
3493 if (event) {
3494 iter->ent_size = ring_buffer_event_length(event);
3495 return ring_buffer_event_data(event);
3496 }
3497 iter->ent_size = 0;
3498 return NULL;
3499}
3500
3501static struct trace_entry *
3502__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3503 unsigned long *missing_events, u64 *ent_ts)
3504{
Olivier Deprez157378f2022-04-04 15:47:50 +02003505 struct trace_buffer *buffer = iter->array_buffer->buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003506 struct trace_entry *ent, *next = NULL;
3507 unsigned long lost_events = 0, next_lost = 0;
3508 int cpu_file = iter->cpu_file;
3509 u64 next_ts = 0, ts;
3510 int next_cpu = -1;
3511 int next_size = 0;
3512 int cpu;
3513
3514 /*
3515 * If we are in a per_cpu trace file, don't bother by iterating over
3516 * all cpu and peek directly.
3517 */
3518 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3519 if (ring_buffer_empty_cpu(buffer, cpu_file))
3520 return NULL;
3521 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3522 if (ent_cpu)
3523 *ent_cpu = cpu_file;
3524
3525 return ent;
3526 }
3527
3528 for_each_tracing_cpu(cpu) {
3529
3530 if (ring_buffer_empty_cpu(buffer, cpu))
3531 continue;
3532
3533 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3534
3535 /*
3536 * Pick the entry with the smallest timestamp:
3537 */
3538 if (ent && (!next || ts < next_ts)) {
3539 next = ent;
3540 next_cpu = cpu;
3541 next_ts = ts;
3542 next_lost = lost_events;
3543 next_size = iter->ent_size;
3544 }
3545 }
3546
3547 iter->ent_size = next_size;
3548
3549 if (ent_cpu)
3550 *ent_cpu = next_cpu;
3551
3552 if (ent_ts)
3553 *ent_ts = next_ts;
3554
3555 if (missing_events)
3556 *missing_events = next_lost;
3557
3558 return next;
3559}
3560
Olivier Deprez157378f2022-04-04 15:47:50 +02003561#define STATIC_TEMP_BUF_SIZE 128
3562static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3563
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003564/* Find the next real entry, without updating the iterator itself */
3565struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3566 int *ent_cpu, u64 *ent_ts)
3567{
Olivier Deprez157378f2022-04-04 15:47:50 +02003568 /* __find_next_entry will reset ent_size */
3569 int ent_size = iter->ent_size;
3570 struct trace_entry *entry;
3571
3572 /*
3573 * If called from ftrace_dump(), then the iter->temp buffer
3574 * will be the static_temp_buf and not created from kmalloc.
3575 * If the entry size is greater than the buffer, we can
3576 * not save it. Just return NULL in that case. This is only
3577 * used to add markers when two consecutive events' time
3578 * stamps have a large delta. See trace_print_lat_context()
3579 */
3580 if (iter->temp == static_temp_buf &&
3581 STATIC_TEMP_BUF_SIZE < ent_size)
3582 return NULL;
3583
3584 /*
3585 * The __find_next_entry() may call peek_next_entry(), which may
3586 * call ring_buffer_peek() that may make the contents of iter->ent
3587 * undefined. Need to copy iter->ent now.
3588 */
3589 if (iter->ent && iter->ent != iter->temp) {
3590 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3591 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3592 void *temp;
3593 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3594 if (!temp)
3595 return NULL;
3596 kfree(iter->temp);
3597 iter->temp = temp;
3598 iter->temp_size = iter->ent_size;
3599 }
3600 memcpy(iter->temp, iter->ent, iter->ent_size);
3601 iter->ent = iter->temp;
3602 }
3603 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3604 /* Put back the original ent_size */
3605 iter->ent_size = ent_size;
3606
3607 return entry;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003608}
3609
3610/* Find the next real entry, and increment the iterator to the next entry */
3611void *trace_find_next_entry_inc(struct trace_iterator *iter)
3612{
3613 iter->ent = __find_next_entry(iter, &iter->cpu,
3614 &iter->lost_events, &iter->ts);
3615
3616 if (iter->ent)
3617 trace_iterator_increment(iter);
3618
3619 return iter->ent ? iter : NULL;
3620}
3621
3622static void trace_consume(struct trace_iterator *iter)
3623{
Olivier Deprez157378f2022-04-04 15:47:50 +02003624 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003625 &iter->lost_events);
3626}
3627
3628static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3629{
3630 struct trace_iterator *iter = m->private;
3631 int i = (int)*pos;
3632 void *ent;
3633
3634 WARN_ON_ONCE(iter->leftover);
3635
3636 (*pos)++;
3637
3638 /* can't go backwards */
3639 if (iter->idx > i)
3640 return NULL;
3641
3642 if (iter->idx < 0)
3643 ent = trace_find_next_entry_inc(iter);
3644 else
3645 ent = iter;
3646
3647 while (ent && iter->idx < i)
3648 ent = trace_find_next_entry_inc(iter);
3649
3650 iter->pos = *pos;
3651
3652 return ent;
3653}
3654
3655void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3656{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003657 struct ring_buffer_iter *buf_iter;
3658 unsigned long entries = 0;
3659 u64 ts;
3660
Olivier Deprez157378f2022-04-04 15:47:50 +02003661 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003662
3663 buf_iter = trace_buffer_iter(iter, cpu);
3664 if (!buf_iter)
3665 return;
3666
3667 ring_buffer_iter_reset(buf_iter);
3668
3669 /*
3670 * We could have the case with the max latency tracers
3671 * that a reset never took place on a cpu. This is evident
3672 * by the timestamp being before the start of the buffer.
3673 */
Olivier Deprez157378f2022-04-04 15:47:50 +02003674 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3675 if (ts >= iter->array_buffer->time_start)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003676 break;
3677 entries++;
Olivier Deprez157378f2022-04-04 15:47:50 +02003678 ring_buffer_iter_advance(buf_iter);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003679 }
3680
Olivier Deprez157378f2022-04-04 15:47:50 +02003681 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003682}
3683
3684/*
3685 * The current tracer is copied to avoid a global locking
3686 * all around.
3687 */
3688static void *s_start(struct seq_file *m, loff_t *pos)
3689{
3690 struct trace_iterator *iter = m->private;
3691 struct trace_array *tr = iter->tr;
3692 int cpu_file = iter->cpu_file;
3693 void *p = NULL;
3694 loff_t l = 0;
3695 int cpu;
3696
3697 /*
3698 * copy the tracer to avoid using a global lock all around.
3699 * iter->trace is a copy of current_trace, the pointer to the
3700 * name may be used instead of a strcmp(), as iter->trace->name
3701 * will point to the same string as current_trace->name.
3702 */
3703 mutex_lock(&trace_types_lock);
3704 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3705 *iter->trace = *tr->current_trace;
3706 mutex_unlock(&trace_types_lock);
3707
3708#ifdef CONFIG_TRACER_MAX_TRACE
3709 if (iter->snapshot && iter->trace->use_max_tr)
3710 return ERR_PTR(-EBUSY);
3711#endif
3712
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003713 if (*pos != iter->pos) {
3714 iter->ent = NULL;
3715 iter->cpu = 0;
3716 iter->idx = -1;
3717
3718 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3719 for_each_tracing_cpu(cpu)
3720 tracing_iter_reset(iter, cpu);
3721 } else
3722 tracing_iter_reset(iter, cpu_file);
3723
3724 iter->leftover = 0;
3725 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3726 ;
3727
3728 } else {
3729 /*
3730 * If we overflowed the seq_file before, then we want
3731 * to just reuse the trace_seq buffer again.
3732 */
3733 if (iter->leftover)
3734 p = iter;
3735 else {
3736 l = *pos - 1;
3737 p = s_next(m, p, &l);
3738 }
3739 }
3740
3741 trace_event_read_lock();
3742 trace_access_lock(cpu_file);
3743 return p;
3744}
3745
3746static void s_stop(struct seq_file *m, void *p)
3747{
3748 struct trace_iterator *iter = m->private;
3749
3750#ifdef CONFIG_TRACER_MAX_TRACE
3751 if (iter->snapshot && iter->trace->use_max_tr)
3752 return;
3753#endif
3754
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003755 trace_access_unlock(iter->cpu_file);
3756 trace_event_read_unlock();
3757}
3758
3759static void
Olivier Deprez157378f2022-04-04 15:47:50 +02003760get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
David Brazdil0f672f62019-12-10 10:32:29 +00003761 unsigned long *entries, int cpu)
3762{
3763 unsigned long count;
3764
3765 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3766 /*
3767 * If this buffer has skipped entries, then we hold all
3768 * entries for the trace and we need to ignore the
3769 * ones before the time stamp.
3770 */
3771 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3772 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3773 /* total is the same as the entries */
3774 *total = count;
3775 } else
3776 *total = count +
3777 ring_buffer_overrun_cpu(buf->buffer, cpu);
3778 *entries = count;
3779}
3780
3781static void
Olivier Deprez157378f2022-04-04 15:47:50 +02003782get_total_entries(struct array_buffer *buf,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003783 unsigned long *total, unsigned long *entries)
3784{
David Brazdil0f672f62019-12-10 10:32:29 +00003785 unsigned long t, e;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003786 int cpu;
3787
3788 *total = 0;
3789 *entries = 0;
3790
3791 for_each_tracing_cpu(cpu) {
David Brazdil0f672f62019-12-10 10:32:29 +00003792 get_total_entries_cpu(buf, &t, &e, cpu);
3793 *total += t;
3794 *entries += e;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003795 }
3796}
3797
David Brazdil0f672f62019-12-10 10:32:29 +00003798unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3799{
3800 unsigned long total, entries;
3801
3802 if (!tr)
3803 tr = &global_trace;
3804
Olivier Deprez157378f2022-04-04 15:47:50 +02003805 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
David Brazdil0f672f62019-12-10 10:32:29 +00003806
3807 return entries;
3808}
3809
3810unsigned long trace_total_entries(struct trace_array *tr)
3811{
3812 unsigned long total, entries;
3813
3814 if (!tr)
3815 tr = &global_trace;
3816
Olivier Deprez157378f2022-04-04 15:47:50 +02003817 get_total_entries(&tr->array_buffer, &total, &entries);
David Brazdil0f672f62019-12-10 10:32:29 +00003818
3819 return entries;
3820}
3821
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003822static void print_lat_help_header(struct seq_file *m)
3823{
Olivier Deprez0e641232021-09-23 10:07:05 +02003824 seq_puts(m, "# _------=> CPU# \n"
3825 "# / _-----=> irqs-off \n"
3826 "# | / _----=> need-resched \n"
3827 "# || / _---=> hardirq/softirq \n"
3828 "# ||| / _--=> preempt-depth \n"
3829 "# |||| / delay \n"
3830 "# cmd pid ||||| time | caller \n"
3831 "# \\ / ||||| \\ | / \n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003832}
3833
Olivier Deprez157378f2022-04-04 15:47:50 +02003834static void print_event_info(struct array_buffer *buf, struct seq_file *m)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003835{
3836 unsigned long total;
3837 unsigned long entries;
3838
3839 get_total_entries(buf, &total, &entries);
3840 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3841 entries, total, num_online_cpus());
3842 seq_puts(m, "#\n");
3843}
3844
Olivier Deprez157378f2022-04-04 15:47:50 +02003845static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003846 unsigned int flags)
3847{
3848 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3849
3850 print_event_info(buf, m);
3851
Olivier Deprez0e641232021-09-23 10:07:05 +02003852 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3853 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003854}
3855
Olivier Deprez157378f2022-04-04 15:47:50 +02003856static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003857 unsigned int flags)
3858{
3859 bool tgid = flags & TRACE_ITER_RECORD_TGID;
Olivier Deprez0e641232021-09-23 10:07:05 +02003860 const char *space = " ";
3861 int prec = tgid ? 12 : 2;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003862
David Brazdil0f672f62019-12-10 10:32:29 +00003863 print_event_info(buf, m);
3864
Olivier Deprez0e641232021-09-23 10:07:05 +02003865 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3866 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3867 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3868 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3869 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3870 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3871 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003872}
3873
3874void
3875print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3876{
3877 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
Olivier Deprez157378f2022-04-04 15:47:50 +02003878 struct array_buffer *buf = iter->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003879 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3880 struct tracer *type = iter->trace;
3881 unsigned long entries;
3882 unsigned long total;
3883 const char *name = "preemption";
3884
3885 name = type->name;
3886
3887 get_total_entries(buf, &total, &entries);
3888
3889 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3890 name, UTS_RELEASE);
3891 seq_puts(m, "# -----------------------------------"
3892 "---------------------------------\n");
3893 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3894 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3895 nsecs_to_usecs(data->saved_latency),
3896 entries,
3897 total,
3898 buf->cpu,
3899#if defined(CONFIG_PREEMPT_NONE)
3900 "server",
3901#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3902 "desktop",
3903#elif defined(CONFIG_PREEMPT)
3904 "preempt",
Olivier Deprez157378f2022-04-04 15:47:50 +02003905#elif defined(CONFIG_PREEMPT_RT)
3906 "preempt_rt",
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003907#else
3908 "unknown",
3909#endif
3910 /* These are reserved for later use */
3911 0, 0, 0, 0);
3912#ifdef CONFIG_SMP
3913 seq_printf(m, " #P:%d)\n", num_online_cpus());
3914#else
3915 seq_puts(m, ")\n");
3916#endif
3917 seq_puts(m, "# -----------------\n");
3918 seq_printf(m, "# | task: %.16s-%d "
3919 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3920 data->comm, data->pid,
3921 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3922 data->policy, data->rt_priority);
3923 seq_puts(m, "# -----------------\n");
3924
3925 if (data->critical_start) {
3926 seq_puts(m, "# => started at: ");
3927 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3928 trace_print_seq(m, &iter->seq);
3929 seq_puts(m, "\n# => ended at: ");
3930 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3931 trace_print_seq(m, &iter->seq);
3932 seq_puts(m, "\n#\n");
3933 }
3934
3935 seq_puts(m, "#\n");
3936}
3937
3938static void test_cpu_buff_start(struct trace_iterator *iter)
3939{
3940 struct trace_seq *s = &iter->seq;
3941 struct trace_array *tr = iter->tr;
3942
3943 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3944 return;
3945
3946 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3947 return;
3948
3949 if (cpumask_available(iter->started) &&
3950 cpumask_test_cpu(iter->cpu, iter->started))
3951 return;
3952
Olivier Deprez157378f2022-04-04 15:47:50 +02003953 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00003954 return;
3955
3956 if (cpumask_available(iter->started))
3957 cpumask_set_cpu(iter->cpu, iter->started);
3958
3959 /* Don't print started cpu buffer for the first entry of the trace */
3960 if (iter->idx > 1)
3961 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3962 iter->cpu);
3963}
3964
3965static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3966{
3967 struct trace_array *tr = iter->tr;
3968 struct trace_seq *s = &iter->seq;
3969 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3970 struct trace_entry *entry;
3971 struct trace_event *event;
3972
3973 entry = iter->ent;
3974
3975 test_cpu_buff_start(iter);
3976
3977 event = ftrace_find_event(entry->type);
3978
3979 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3980 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3981 trace_print_lat_context(iter);
3982 else
3983 trace_print_context(iter);
3984 }
3985
3986 if (trace_seq_has_overflowed(s))
3987 return TRACE_TYPE_PARTIAL_LINE;
3988
3989 if (event)
3990 return event->funcs->trace(iter, sym_flags, event);
3991
3992 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3993
3994 return trace_handle_return(s);
3995}
3996
3997static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3998{
3999 struct trace_array *tr = iter->tr;
4000 struct trace_seq *s = &iter->seq;
4001 struct trace_entry *entry;
4002 struct trace_event *event;
4003
4004 entry = iter->ent;
4005
4006 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4007 trace_seq_printf(s, "%d %d %llu ",
4008 entry->pid, iter->cpu, iter->ts);
4009
4010 if (trace_seq_has_overflowed(s))
4011 return TRACE_TYPE_PARTIAL_LINE;
4012
4013 event = ftrace_find_event(entry->type);
4014 if (event)
4015 return event->funcs->raw(iter, 0, event);
4016
4017 trace_seq_printf(s, "%d ?\n", entry->type);
4018
4019 return trace_handle_return(s);
4020}
4021
4022static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4023{
4024 struct trace_array *tr = iter->tr;
4025 struct trace_seq *s = &iter->seq;
4026 unsigned char newline = '\n';
4027 struct trace_entry *entry;
4028 struct trace_event *event;
4029
4030 entry = iter->ent;
4031
4032 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4033 SEQ_PUT_HEX_FIELD(s, entry->pid);
4034 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4035 SEQ_PUT_HEX_FIELD(s, iter->ts);
4036 if (trace_seq_has_overflowed(s))
4037 return TRACE_TYPE_PARTIAL_LINE;
4038 }
4039
4040 event = ftrace_find_event(entry->type);
4041 if (event) {
4042 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4043 if (ret != TRACE_TYPE_HANDLED)
4044 return ret;
4045 }
4046
4047 SEQ_PUT_FIELD(s, newline);
4048
4049 return trace_handle_return(s);
4050}
4051
4052static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4053{
4054 struct trace_array *tr = iter->tr;
4055 struct trace_seq *s = &iter->seq;
4056 struct trace_entry *entry;
4057 struct trace_event *event;
4058
4059 entry = iter->ent;
4060
4061 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4062 SEQ_PUT_FIELD(s, entry->pid);
4063 SEQ_PUT_FIELD(s, iter->cpu);
4064 SEQ_PUT_FIELD(s, iter->ts);
4065 if (trace_seq_has_overflowed(s))
4066 return TRACE_TYPE_PARTIAL_LINE;
4067 }
4068
4069 event = ftrace_find_event(entry->type);
4070 return event ? event->funcs->binary(iter, 0, event) :
4071 TRACE_TYPE_HANDLED;
4072}
4073
4074int trace_empty(struct trace_iterator *iter)
4075{
4076 struct ring_buffer_iter *buf_iter;
4077 int cpu;
4078
4079 /* If we are looking at one CPU buffer, only check that one */
4080 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4081 cpu = iter->cpu_file;
4082 buf_iter = trace_buffer_iter(iter, cpu);
4083 if (buf_iter) {
4084 if (!ring_buffer_iter_empty(buf_iter))
4085 return 0;
4086 } else {
Olivier Deprez157378f2022-04-04 15:47:50 +02004087 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004088 return 0;
4089 }
4090 return 1;
4091 }
4092
4093 for_each_tracing_cpu(cpu) {
4094 buf_iter = trace_buffer_iter(iter, cpu);
4095 if (buf_iter) {
4096 if (!ring_buffer_iter_empty(buf_iter))
4097 return 0;
4098 } else {
Olivier Deprez157378f2022-04-04 15:47:50 +02004099 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004100 return 0;
4101 }
4102 }
4103
4104 return 1;
4105}
4106
4107/* Called with trace_event_read_lock() held. */
4108enum print_line_t print_trace_line(struct trace_iterator *iter)
4109{
4110 struct trace_array *tr = iter->tr;
4111 unsigned long trace_flags = tr->trace_flags;
4112 enum print_line_t ret;
4113
4114 if (iter->lost_events) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004115 if (iter->lost_events == (unsigned long)-1)
4116 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4117 iter->cpu);
4118 else
4119 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4120 iter->cpu, iter->lost_events);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004121 if (trace_seq_has_overflowed(&iter->seq))
4122 return TRACE_TYPE_PARTIAL_LINE;
4123 }
4124
4125 if (iter->trace && iter->trace->print_line) {
4126 ret = iter->trace->print_line(iter);
4127 if (ret != TRACE_TYPE_UNHANDLED)
4128 return ret;
4129 }
4130
4131 if (iter->ent->type == TRACE_BPUTS &&
4132 trace_flags & TRACE_ITER_PRINTK &&
4133 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4134 return trace_print_bputs_msg_only(iter);
4135
4136 if (iter->ent->type == TRACE_BPRINT &&
4137 trace_flags & TRACE_ITER_PRINTK &&
4138 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4139 return trace_print_bprintk_msg_only(iter);
4140
4141 if (iter->ent->type == TRACE_PRINT &&
4142 trace_flags & TRACE_ITER_PRINTK &&
4143 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4144 return trace_print_printk_msg_only(iter);
4145
4146 if (trace_flags & TRACE_ITER_BIN)
4147 return print_bin_fmt(iter);
4148
4149 if (trace_flags & TRACE_ITER_HEX)
4150 return print_hex_fmt(iter);
4151
4152 if (trace_flags & TRACE_ITER_RAW)
4153 return print_raw_fmt(iter);
4154
4155 return print_trace_fmt(iter);
4156}
4157
4158void trace_latency_header(struct seq_file *m)
4159{
4160 struct trace_iterator *iter = m->private;
4161 struct trace_array *tr = iter->tr;
4162
4163 /* print nothing if the buffers are empty */
4164 if (trace_empty(iter))
4165 return;
4166
4167 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4168 print_trace_header(m, iter);
4169
4170 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4171 print_lat_help_header(m);
4172}
4173
4174void trace_default_header(struct seq_file *m)
4175{
4176 struct trace_iterator *iter = m->private;
4177 struct trace_array *tr = iter->tr;
4178 unsigned long trace_flags = tr->trace_flags;
4179
4180 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4181 return;
4182
4183 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4184 /* print nothing if the buffers are empty */
4185 if (trace_empty(iter))
4186 return;
4187 print_trace_header(m, iter);
4188 if (!(trace_flags & TRACE_ITER_VERBOSE))
4189 print_lat_help_header(m);
4190 } else {
4191 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4192 if (trace_flags & TRACE_ITER_IRQ_INFO)
Olivier Deprez157378f2022-04-04 15:47:50 +02004193 print_func_help_header_irq(iter->array_buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004194 m, trace_flags);
4195 else
Olivier Deprez157378f2022-04-04 15:47:50 +02004196 print_func_help_header(iter->array_buffer, m,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004197 trace_flags);
4198 }
4199 }
4200}
4201
4202static void test_ftrace_alive(struct seq_file *m)
4203{
4204 if (!ftrace_is_dead())
4205 return;
4206 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4207 "# MAY BE MISSING FUNCTION EVENTS\n");
4208}
4209
4210#ifdef CONFIG_TRACER_MAX_TRACE
4211static void show_snapshot_main_help(struct seq_file *m)
4212{
4213 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4214 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4215 "# Takes a snapshot of the main buffer.\n"
4216 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4217 "# (Doesn't have to be '2' works with any number that\n"
4218 "# is not a '0' or '1')\n");
4219}
4220
4221static void show_snapshot_percpu_help(struct seq_file *m)
4222{
4223 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4224#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4225 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4226 "# Takes a snapshot of the main buffer for this cpu.\n");
4227#else
4228 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4229 "# Must use main snapshot file to allocate.\n");
4230#endif
4231 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4232 "# (Doesn't have to be '2' works with any number that\n"
4233 "# is not a '0' or '1')\n");
4234}
4235
4236static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4237{
4238 if (iter->tr->allocated_snapshot)
4239 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4240 else
4241 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4242
4243 seq_puts(m, "# Snapshot commands:\n");
4244 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4245 show_snapshot_main_help(m);
4246 else
4247 show_snapshot_percpu_help(m);
4248}
4249#else
4250/* Should never be called */
4251static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4252#endif
4253
4254static int s_show(struct seq_file *m, void *v)
4255{
4256 struct trace_iterator *iter = v;
4257 int ret;
4258
4259 if (iter->ent == NULL) {
4260 if (iter->tr) {
4261 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4262 seq_puts(m, "#\n");
4263 test_ftrace_alive(m);
4264 }
4265 if (iter->snapshot && trace_empty(iter))
4266 print_snapshot_help(m, iter);
4267 else if (iter->trace && iter->trace->print_header)
4268 iter->trace->print_header(m);
4269 else
4270 trace_default_header(m);
4271
4272 } else if (iter->leftover) {
4273 /*
4274 * If we filled the seq_file buffer earlier, we
4275 * want to just show it now.
4276 */
4277 ret = trace_print_seq(m, &iter->seq);
4278
4279 /* ret should this time be zero, but you never know */
4280 iter->leftover = ret;
4281
4282 } else {
4283 print_trace_line(iter);
4284 ret = trace_print_seq(m, &iter->seq);
4285 /*
4286 * If we overflow the seq_file buffer, then it will
4287 * ask us for this data again at start up.
4288 * Use that instead.
4289 * ret is 0 if seq_file write succeeded.
4290 * -1 otherwise.
4291 */
4292 iter->leftover = ret;
4293 }
4294
4295 return 0;
4296}
4297
4298/*
4299 * Should be used after trace_array_get(), trace_types_lock
4300 * ensures that i_cdev was already initialized.
4301 */
4302static inline int tracing_get_cpu(struct inode *inode)
4303{
4304 if (inode->i_cdev) /* See trace_create_cpu_file() */
4305 return (long)inode->i_cdev - 1;
4306 return RING_BUFFER_ALL_CPUS;
4307}
4308
4309static const struct seq_operations tracer_seq_ops = {
4310 .start = s_start,
4311 .next = s_next,
4312 .stop = s_stop,
4313 .show = s_show,
4314};
4315
4316static struct trace_iterator *
4317__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4318{
4319 struct trace_array *tr = inode->i_private;
4320 struct trace_iterator *iter;
4321 int cpu;
4322
4323 if (tracing_disabled)
4324 return ERR_PTR(-ENODEV);
4325
4326 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4327 if (!iter)
4328 return ERR_PTR(-ENOMEM);
4329
4330 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4331 GFP_KERNEL);
4332 if (!iter->buffer_iter)
4333 goto release;
4334
4335 /*
Olivier Deprez157378f2022-04-04 15:47:50 +02004336 * trace_find_next_entry() may need to save off iter->ent.
4337 * It will place it into the iter->temp buffer. As most
4338 * events are less than 128, allocate a buffer of that size.
4339 * If one is greater, then trace_find_next_entry() will
4340 * allocate a new buffer to adjust for the bigger iter->ent.
4341 * It's not critical if it fails to get allocated here.
4342 */
4343 iter->temp = kmalloc(128, GFP_KERNEL);
4344 if (iter->temp)
4345 iter->temp_size = 128;
4346
4347 /*
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004348 * We make a copy of the current tracer to avoid concurrent
4349 * changes on it while we are reading.
4350 */
4351 mutex_lock(&trace_types_lock);
4352 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4353 if (!iter->trace)
4354 goto fail;
4355
4356 *iter->trace = *tr->current_trace;
4357
4358 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4359 goto fail;
4360
4361 iter->tr = tr;
4362
4363#ifdef CONFIG_TRACER_MAX_TRACE
4364 /* Currently only the top directory has a snapshot */
4365 if (tr->current_trace->print_max || snapshot)
Olivier Deprez157378f2022-04-04 15:47:50 +02004366 iter->array_buffer = &tr->max_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004367 else
4368#endif
Olivier Deprez157378f2022-04-04 15:47:50 +02004369 iter->array_buffer = &tr->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004370 iter->snapshot = snapshot;
4371 iter->pos = -1;
4372 iter->cpu_file = tracing_get_cpu(inode);
4373 mutex_init(&iter->mutex);
4374
4375 /* Notify the tracer early; before we stop tracing. */
Olivier Deprez157378f2022-04-04 15:47:50 +02004376 if (iter->trace->open)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004377 iter->trace->open(iter);
4378
4379 /* Annotate start of buffers if we had overruns */
Olivier Deprez157378f2022-04-04 15:47:50 +02004380 if (ring_buffer_overruns(iter->array_buffer->buffer))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004381 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4382
4383 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4384 if (trace_clocks[tr->clock_id].in_ns)
4385 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4386
Olivier Deprez157378f2022-04-04 15:47:50 +02004387 /*
4388 * If pause-on-trace is enabled, then stop the trace while
4389 * dumping, unless this is the "snapshot" file
4390 */
4391 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004392 tracing_stop_tr(tr);
4393
4394 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4395 for_each_tracing_cpu(cpu) {
4396 iter->buffer_iter[cpu] =
Olivier Deprez157378f2022-04-04 15:47:50 +02004397 ring_buffer_read_prepare(iter->array_buffer->buffer,
David Brazdil0f672f62019-12-10 10:32:29 +00004398 cpu, GFP_KERNEL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004399 }
4400 ring_buffer_read_prepare_sync();
4401 for_each_tracing_cpu(cpu) {
4402 ring_buffer_read_start(iter->buffer_iter[cpu]);
4403 tracing_iter_reset(iter, cpu);
4404 }
4405 } else {
4406 cpu = iter->cpu_file;
4407 iter->buffer_iter[cpu] =
Olivier Deprez157378f2022-04-04 15:47:50 +02004408 ring_buffer_read_prepare(iter->array_buffer->buffer,
David Brazdil0f672f62019-12-10 10:32:29 +00004409 cpu, GFP_KERNEL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004410 ring_buffer_read_prepare_sync();
4411 ring_buffer_read_start(iter->buffer_iter[cpu]);
4412 tracing_iter_reset(iter, cpu);
4413 }
4414
4415 mutex_unlock(&trace_types_lock);
4416
4417 return iter;
4418
4419 fail:
4420 mutex_unlock(&trace_types_lock);
4421 kfree(iter->trace);
Olivier Deprez157378f2022-04-04 15:47:50 +02004422 kfree(iter->temp);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004423 kfree(iter->buffer_iter);
4424release:
4425 seq_release_private(inode, file);
4426 return ERR_PTR(-ENOMEM);
4427}
4428
4429int tracing_open_generic(struct inode *inode, struct file *filp)
4430{
David Brazdil0f672f62019-12-10 10:32:29 +00004431 int ret;
4432
4433 ret = tracing_check_open_get_tr(NULL);
4434 if (ret)
4435 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004436
4437 filp->private_data = inode->i_private;
4438 return 0;
4439}
4440
4441bool tracing_is_disabled(void)
4442{
4443 return (tracing_disabled) ? true: false;
4444}
4445
4446/*
4447 * Open and update trace_array ref count.
4448 * Must have the current trace_array passed to it.
4449 */
David Brazdil0f672f62019-12-10 10:32:29 +00004450int tracing_open_generic_tr(struct inode *inode, struct file *filp)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004451{
4452 struct trace_array *tr = inode->i_private;
David Brazdil0f672f62019-12-10 10:32:29 +00004453 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004454
David Brazdil0f672f62019-12-10 10:32:29 +00004455 ret = tracing_check_open_get_tr(tr);
4456 if (ret)
4457 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004458
4459 filp->private_data = inode->i_private;
4460
4461 return 0;
4462}
4463
4464static int tracing_release(struct inode *inode, struct file *file)
4465{
4466 struct trace_array *tr = inode->i_private;
4467 struct seq_file *m = file->private_data;
4468 struct trace_iterator *iter;
4469 int cpu;
4470
4471 if (!(file->f_mode & FMODE_READ)) {
4472 trace_array_put(tr);
4473 return 0;
4474 }
4475
4476 /* Writes do not use seq_file */
4477 iter = m->private;
4478 mutex_lock(&trace_types_lock);
4479
4480 for_each_tracing_cpu(cpu) {
4481 if (iter->buffer_iter[cpu])
4482 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4483 }
4484
4485 if (iter->trace && iter->trace->close)
4486 iter->trace->close(iter);
4487
Olivier Deprez157378f2022-04-04 15:47:50 +02004488 if (!iter->snapshot && tr->stop_count)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004489 /* reenable tracing if it was previously enabled */
4490 tracing_start_tr(tr);
4491
4492 __trace_array_put(tr);
4493
4494 mutex_unlock(&trace_types_lock);
4495
4496 mutex_destroy(&iter->mutex);
4497 free_cpumask_var(iter->started);
Olivier Deprez157378f2022-04-04 15:47:50 +02004498 kfree(iter->temp);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004499 kfree(iter->trace);
4500 kfree(iter->buffer_iter);
4501 seq_release_private(inode, file);
4502
4503 return 0;
4504}
4505
4506static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4507{
4508 struct trace_array *tr = inode->i_private;
4509
4510 trace_array_put(tr);
4511 return 0;
4512}
4513
4514static int tracing_single_release_tr(struct inode *inode, struct file *file)
4515{
4516 struct trace_array *tr = inode->i_private;
4517
4518 trace_array_put(tr);
4519
4520 return single_release(inode, file);
4521}
4522
4523static int tracing_open(struct inode *inode, struct file *file)
4524{
4525 struct trace_array *tr = inode->i_private;
4526 struct trace_iterator *iter;
David Brazdil0f672f62019-12-10 10:32:29 +00004527 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004528
David Brazdil0f672f62019-12-10 10:32:29 +00004529 ret = tracing_check_open_get_tr(tr);
4530 if (ret)
4531 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004532
4533 /* If this file was open for write, then erase contents */
4534 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4535 int cpu = tracing_get_cpu(inode);
Olivier Deprez157378f2022-04-04 15:47:50 +02004536 struct array_buffer *trace_buf = &tr->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004537
4538#ifdef CONFIG_TRACER_MAX_TRACE
4539 if (tr->current_trace->print_max)
4540 trace_buf = &tr->max_buffer;
4541#endif
4542
4543 if (cpu == RING_BUFFER_ALL_CPUS)
4544 tracing_reset_online_cpus(trace_buf);
4545 else
David Brazdil0f672f62019-12-10 10:32:29 +00004546 tracing_reset_cpu(trace_buf, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004547 }
4548
4549 if (file->f_mode & FMODE_READ) {
4550 iter = __tracing_open(inode, file, false);
4551 if (IS_ERR(iter))
4552 ret = PTR_ERR(iter);
4553 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4554 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4555 }
4556
4557 if (ret < 0)
4558 trace_array_put(tr);
4559
4560 return ret;
4561}
4562
4563/*
4564 * Some tracers are not suitable for instance buffers.
4565 * A tracer is always available for the global array (toplevel)
4566 * or if it explicitly states that it is.
4567 */
4568static bool
4569trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4570{
4571 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4572}
4573
4574/* Find the next tracer that this trace array may use */
4575static struct tracer *
4576get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4577{
4578 while (t && !trace_ok_for_array(t, tr))
4579 t = t->next;
4580
4581 return t;
4582}
4583
4584static void *
4585t_next(struct seq_file *m, void *v, loff_t *pos)
4586{
4587 struct trace_array *tr = m->private;
4588 struct tracer *t = v;
4589
4590 (*pos)++;
4591
4592 if (t)
4593 t = get_tracer_for_array(tr, t->next);
4594
4595 return t;
4596}
4597
4598static void *t_start(struct seq_file *m, loff_t *pos)
4599{
4600 struct trace_array *tr = m->private;
4601 struct tracer *t;
4602 loff_t l = 0;
4603
4604 mutex_lock(&trace_types_lock);
4605
4606 t = get_tracer_for_array(tr, trace_types);
4607 for (; t && l < *pos; t = t_next(m, t, &l))
4608 ;
4609
4610 return t;
4611}
4612
4613static void t_stop(struct seq_file *m, void *p)
4614{
4615 mutex_unlock(&trace_types_lock);
4616}
4617
4618static int t_show(struct seq_file *m, void *v)
4619{
4620 struct tracer *t = v;
4621
4622 if (!t)
4623 return 0;
4624
4625 seq_puts(m, t->name);
4626 if (t->next)
4627 seq_putc(m, ' ');
4628 else
4629 seq_putc(m, '\n');
4630
4631 return 0;
4632}
4633
4634static const struct seq_operations show_traces_seq_ops = {
4635 .start = t_start,
4636 .next = t_next,
4637 .stop = t_stop,
4638 .show = t_show,
4639};
4640
4641static int show_traces_open(struct inode *inode, struct file *file)
4642{
4643 struct trace_array *tr = inode->i_private;
4644 struct seq_file *m;
4645 int ret;
4646
David Brazdil0f672f62019-12-10 10:32:29 +00004647 ret = tracing_check_open_get_tr(tr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004648 if (ret)
4649 return ret;
4650
David Brazdil0f672f62019-12-10 10:32:29 +00004651 ret = seq_open(file, &show_traces_seq_ops);
4652 if (ret) {
4653 trace_array_put(tr);
4654 return ret;
4655 }
4656
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004657 m = file->private_data;
4658 m->private = tr;
4659
4660 return 0;
4661}
4662
David Brazdil0f672f62019-12-10 10:32:29 +00004663static int show_traces_release(struct inode *inode, struct file *file)
4664{
4665 struct trace_array *tr = inode->i_private;
4666
4667 trace_array_put(tr);
4668 return seq_release(inode, file);
4669}
4670
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004671static ssize_t
4672tracing_write_stub(struct file *filp, const char __user *ubuf,
4673 size_t count, loff_t *ppos)
4674{
4675 return count;
4676}
4677
4678loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4679{
4680 int ret;
4681
4682 if (file->f_mode & FMODE_READ)
4683 ret = seq_lseek(file, offset, whence);
4684 else
4685 file->f_pos = ret = 0;
4686
4687 return ret;
4688}
4689
4690static const struct file_operations tracing_fops = {
4691 .open = tracing_open,
4692 .read = seq_read,
4693 .write = tracing_write_stub,
4694 .llseek = tracing_lseek,
4695 .release = tracing_release,
4696};
4697
4698static const struct file_operations show_traces_fops = {
4699 .open = show_traces_open,
4700 .read = seq_read,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004701 .llseek = seq_lseek,
David Brazdil0f672f62019-12-10 10:32:29 +00004702 .release = show_traces_release,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004703};
4704
4705static ssize_t
4706tracing_cpumask_read(struct file *filp, char __user *ubuf,
4707 size_t count, loff_t *ppos)
4708{
4709 struct trace_array *tr = file_inode(filp)->i_private;
4710 char *mask_str;
4711 int len;
4712
4713 len = snprintf(NULL, 0, "%*pb\n",
4714 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4715 mask_str = kmalloc(len, GFP_KERNEL);
4716 if (!mask_str)
4717 return -ENOMEM;
4718
4719 len = snprintf(mask_str, len, "%*pb\n",
4720 cpumask_pr_args(tr->tracing_cpumask));
4721 if (len >= count) {
4722 count = -EINVAL;
4723 goto out_err;
4724 }
4725 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4726
4727out_err:
4728 kfree(mask_str);
4729
4730 return count;
4731}
4732
Olivier Deprez157378f2022-04-04 15:47:50 +02004733int tracing_set_cpumask(struct trace_array *tr,
4734 cpumask_var_t tracing_cpumask_new)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004735{
Olivier Deprez157378f2022-04-04 15:47:50 +02004736 int cpu;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004737
Olivier Deprez157378f2022-04-04 15:47:50 +02004738 if (!tr)
4739 return -EINVAL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004740
4741 local_irq_disable();
4742 arch_spin_lock(&tr->max_lock);
4743 for_each_tracing_cpu(cpu) {
4744 /*
4745 * Increase/decrease the disabled counter if we are
4746 * about to flip a bit in the cpumask:
4747 */
4748 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4749 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004750 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4751 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004752 }
4753 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4754 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004755 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
4756 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004757 }
4758 }
4759 arch_spin_unlock(&tr->max_lock);
4760 local_irq_enable();
4761
4762 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
Olivier Deprez157378f2022-04-04 15:47:50 +02004763
4764 return 0;
4765}
4766
4767static ssize_t
4768tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4769 size_t count, loff_t *ppos)
4770{
4771 struct trace_array *tr = file_inode(filp)->i_private;
4772 cpumask_var_t tracing_cpumask_new;
4773 int err;
4774
4775 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4776 return -ENOMEM;
4777
4778 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4779 if (err)
4780 goto err_free;
4781
4782 err = tracing_set_cpumask(tr, tracing_cpumask_new);
4783 if (err)
4784 goto err_free;
4785
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004786 free_cpumask_var(tracing_cpumask_new);
4787
4788 return count;
4789
Olivier Deprez157378f2022-04-04 15:47:50 +02004790err_free:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004791 free_cpumask_var(tracing_cpumask_new);
4792
4793 return err;
4794}
4795
4796static const struct file_operations tracing_cpumask_fops = {
4797 .open = tracing_open_generic_tr,
4798 .read = tracing_cpumask_read,
4799 .write = tracing_cpumask_write,
4800 .release = tracing_release_generic_tr,
4801 .llseek = generic_file_llseek,
4802};
4803
4804static int tracing_trace_options_show(struct seq_file *m, void *v)
4805{
4806 struct tracer_opt *trace_opts;
4807 struct trace_array *tr = m->private;
4808 u32 tracer_flags;
4809 int i;
4810
4811 mutex_lock(&trace_types_lock);
4812 tracer_flags = tr->current_trace->flags->val;
4813 trace_opts = tr->current_trace->flags->opts;
4814
4815 for (i = 0; trace_options[i]; i++) {
4816 if (tr->trace_flags & (1 << i))
4817 seq_printf(m, "%s\n", trace_options[i]);
4818 else
4819 seq_printf(m, "no%s\n", trace_options[i]);
4820 }
4821
4822 for (i = 0; trace_opts[i].name; i++) {
4823 if (tracer_flags & trace_opts[i].bit)
4824 seq_printf(m, "%s\n", trace_opts[i].name);
4825 else
4826 seq_printf(m, "no%s\n", trace_opts[i].name);
4827 }
4828 mutex_unlock(&trace_types_lock);
4829
4830 return 0;
4831}
4832
4833static int __set_tracer_option(struct trace_array *tr,
4834 struct tracer_flags *tracer_flags,
4835 struct tracer_opt *opts, int neg)
4836{
4837 struct tracer *trace = tracer_flags->trace;
4838 int ret;
4839
4840 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4841 if (ret)
4842 return ret;
4843
4844 if (neg)
4845 tracer_flags->val &= ~opts->bit;
4846 else
4847 tracer_flags->val |= opts->bit;
4848 return 0;
4849}
4850
4851/* Try to assign a tracer specific option */
4852static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4853{
4854 struct tracer *trace = tr->current_trace;
4855 struct tracer_flags *tracer_flags = trace->flags;
4856 struct tracer_opt *opts = NULL;
4857 int i;
4858
4859 for (i = 0; tracer_flags->opts[i].name; i++) {
4860 opts = &tracer_flags->opts[i];
4861
4862 if (strcmp(cmp, opts->name) == 0)
4863 return __set_tracer_option(tr, trace->flags, opts, neg);
4864 }
4865
4866 return -EINVAL;
4867}
4868
4869/* Some tracers require overwrite to stay enabled */
4870int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4871{
4872 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4873 return -1;
4874
4875 return 0;
4876}
4877
4878int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4879{
Olivier Deprez0e641232021-09-23 10:07:05 +02004880 int *map;
4881
4882 if ((mask == TRACE_ITER_RECORD_TGID) ||
4883 (mask == TRACE_ITER_RECORD_CMD))
4884 lockdep_assert_held(&event_mutex);
4885
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004886 /* do nothing if flag is already set */
4887 if (!!(tr->trace_flags & mask) == !!enabled)
4888 return 0;
4889
4890 /* Give the tracer a chance to approve the change */
4891 if (tr->current_trace->flag_changed)
4892 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4893 return -EINVAL;
4894
4895 if (enabled)
4896 tr->trace_flags |= mask;
4897 else
4898 tr->trace_flags &= ~mask;
4899
4900 if (mask == TRACE_ITER_RECORD_CMD)
4901 trace_event_enable_cmd_record(enabled);
4902
4903 if (mask == TRACE_ITER_RECORD_TGID) {
Olivier Deprez0e641232021-09-23 10:07:05 +02004904 if (!tgid_map) {
4905 tgid_map_max = pid_max;
4906 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4907 GFP_KERNEL);
4908
4909 /*
4910 * Pairs with smp_load_acquire() in
4911 * trace_find_tgid_ptr() to ensure that if it observes
4912 * the tgid_map we just allocated then it also observes
4913 * the corresponding tgid_map_max value.
4914 */
4915 smp_store_release(&tgid_map, map);
4916 }
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004917 if (!tgid_map) {
4918 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4919 return -ENOMEM;
4920 }
4921
4922 trace_event_enable_tgid_record(enabled);
4923 }
4924
4925 if (mask == TRACE_ITER_EVENT_FORK)
4926 trace_event_follow_fork(tr, enabled);
4927
4928 if (mask == TRACE_ITER_FUNC_FORK)
4929 ftrace_pid_follow_fork(tr, enabled);
4930
4931 if (mask == TRACE_ITER_OVERWRITE) {
Olivier Deprez157378f2022-04-04 15:47:50 +02004932 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004933#ifdef CONFIG_TRACER_MAX_TRACE
4934 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4935#endif
4936 }
4937
4938 if (mask == TRACE_ITER_PRINTK) {
4939 trace_printk_start_stop_comm(enabled);
4940 trace_printk_control(enabled);
4941 }
4942
4943 return 0;
4944}
4945
Olivier Deprez157378f2022-04-04 15:47:50 +02004946int trace_set_options(struct trace_array *tr, char *option)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004947{
4948 char *cmp;
4949 int neg = 0;
4950 int ret;
4951 size_t orig_len = strlen(option);
David Brazdil0f672f62019-12-10 10:32:29 +00004952 int len;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004953
4954 cmp = strstrip(option);
4955
David Brazdil0f672f62019-12-10 10:32:29 +00004956 len = str_has_prefix(cmp, "no");
4957 if (len)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004958 neg = 1;
David Brazdil0f672f62019-12-10 10:32:29 +00004959
4960 cmp += len;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004961
Olivier Deprez0e641232021-09-23 10:07:05 +02004962 mutex_lock(&event_mutex);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004963 mutex_lock(&trace_types_lock);
4964
4965 ret = match_string(trace_options, -1, cmp);
4966 /* If no option could be set, test the specific tracer options */
4967 if (ret < 0)
4968 ret = set_tracer_option(tr, cmp, neg);
4969 else
4970 ret = set_tracer_flag(tr, 1 << ret, !neg);
4971
4972 mutex_unlock(&trace_types_lock);
Olivier Deprez0e641232021-09-23 10:07:05 +02004973 mutex_unlock(&event_mutex);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00004974
4975 /*
4976 * If the first trailing whitespace is replaced with '\0' by strstrip,
4977 * turn it back into a space.
4978 */
4979 if (orig_len > strlen(option))
4980 option[strlen(option)] = ' ';
4981
4982 return ret;
4983}
4984
4985static void __init apply_trace_boot_options(void)
4986{
4987 char *buf = trace_boot_options_buf;
4988 char *option;
4989
4990 while (true) {
4991 option = strsep(&buf, ",");
4992
4993 if (!option)
4994 break;
4995
4996 if (*option)
4997 trace_set_options(&global_trace, option);
4998
4999 /* Put back the comma to allow this to be called again */
5000 if (buf)
5001 *(buf - 1) = ',';
5002 }
5003}
5004
5005static ssize_t
5006tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5007 size_t cnt, loff_t *ppos)
5008{
5009 struct seq_file *m = filp->private_data;
5010 struct trace_array *tr = m->private;
5011 char buf[64];
5012 int ret;
5013
5014 if (cnt >= sizeof(buf))
5015 return -EINVAL;
5016
5017 if (copy_from_user(buf, ubuf, cnt))
5018 return -EFAULT;
5019
5020 buf[cnt] = 0;
5021
5022 ret = trace_set_options(tr, buf);
5023 if (ret < 0)
5024 return ret;
5025
5026 *ppos += cnt;
5027
5028 return cnt;
5029}
5030
5031static int tracing_trace_options_open(struct inode *inode, struct file *file)
5032{
5033 struct trace_array *tr = inode->i_private;
5034 int ret;
5035
David Brazdil0f672f62019-12-10 10:32:29 +00005036 ret = tracing_check_open_get_tr(tr);
5037 if (ret)
5038 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005039
5040 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5041 if (ret < 0)
5042 trace_array_put(tr);
5043
5044 return ret;
5045}
5046
5047static const struct file_operations tracing_iter_fops = {
5048 .open = tracing_trace_options_open,
5049 .read = seq_read,
5050 .llseek = seq_lseek,
5051 .release = tracing_single_release_tr,
5052 .write = tracing_trace_options_write,
5053};
5054
5055static const char readme_msg[] =
5056 "tracing mini-HOWTO:\n\n"
5057 "# echo 0 > tracing_on : quick way to disable tracing\n"
5058 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5059 " Important files:\n"
5060 " trace\t\t\t- The static contents of the buffer\n"
5061 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5062 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5063 " current_tracer\t- function and latency tracers\n"
5064 " available_tracers\t- list of configured tracers for current_tracer\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005065 " error_log\t- error log for failed commands (that support it)\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005066 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5067 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5068 " trace_clock\t\t-change the clock used to order events\n"
5069 " local: Per cpu clock but may not be synced across CPUs\n"
5070 " global: Synced across CPUs but slows tracing down.\n"
5071 " counter: Not a clock, but just an increment\n"
5072 " uptime: Jiffy counter from time of boot\n"
5073 " perf: Same clock that perf events use\n"
5074#ifdef CONFIG_X86_64
5075 " x86-tsc: TSC cycle counter\n"
5076#endif
5077 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5078 " delta: Delta difference against a buffer-wide timestamp\n"
5079 " absolute: Absolute (standalone) timestamp\n"
5080 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5081 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5082 " tracing_cpumask\t- Limit which CPUs to trace\n"
5083 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5084 "\t\t\t Remove sub-buffer with rmdir\n"
5085 " trace_options\t\t- Set format or modify how tracing happens\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005086 "\t\t\t Disable an option by prefixing 'no' to the\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005087 "\t\t\t option name\n"
5088 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5089#ifdef CONFIG_DYNAMIC_FTRACE
5090 "\n available_filter_functions - list of functions that can be filtered on\n"
5091 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5092 "\t\t\t functions\n"
5093 "\t accepts: func_full_name or glob-matching-pattern\n"
5094 "\t modules: Can select a group via module\n"
5095 "\t Format: :mod:<module-name>\n"
5096 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5097 "\t triggers: a command to perform when function is hit\n"
5098 "\t Format: <function>:<trigger>[:count]\n"
5099 "\t trigger: traceon, traceoff\n"
5100 "\t\t enable_event:<system>:<event>\n"
5101 "\t\t disable_event:<system>:<event>\n"
5102#ifdef CONFIG_STACKTRACE
5103 "\t\t stacktrace\n"
5104#endif
5105#ifdef CONFIG_TRACER_SNAPSHOT
5106 "\t\t snapshot\n"
5107#endif
5108 "\t\t dump\n"
5109 "\t\t cpudump\n"
5110 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5111 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5112 "\t The first one will disable tracing every time do_fault is hit\n"
5113 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5114 "\t The first time do trap is hit and it disables tracing, the\n"
5115 "\t counter will decrement to 2. If tracing is already disabled,\n"
5116 "\t the counter will not decrement. It only decrements when the\n"
5117 "\t trigger did work\n"
5118 "\t To remove trigger without count:\n"
5119 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5120 "\t To remove trigger with a count:\n"
5121 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5122 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5123 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5124 "\t modules: Can select a group via module command :mod:\n"
5125 "\t Does not accept triggers\n"
5126#endif /* CONFIG_DYNAMIC_FTRACE */
5127#ifdef CONFIG_FUNCTION_TRACER
5128 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5129 "\t\t (function)\n"
Olivier Deprez157378f2022-04-04 15:47:50 +02005130 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5131 "\t\t (function)\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005132#endif
5133#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5134 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5135 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5136 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5137#endif
5138#ifdef CONFIG_TRACER_SNAPSHOT
5139 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5140 "\t\t\t snapshot buffer. Read the contents for more\n"
5141 "\t\t\t information\n"
5142#endif
5143#ifdef CONFIG_STACK_TRACER
5144 " stack_trace\t\t- Shows the max stack trace when active\n"
5145 " stack_max_size\t- Shows current max stack size that was traced\n"
5146 "\t\t\t Write into this file to reset the max size (trigger a\n"
5147 "\t\t\t new trace)\n"
5148#ifdef CONFIG_DYNAMIC_FTRACE
5149 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5150 "\t\t\t traces\n"
5151#endif
5152#endif /* CONFIG_STACK_TRACER */
David Brazdil0f672f62019-12-10 10:32:29 +00005153#ifdef CONFIG_DYNAMIC_EVENTS
5154 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5155 "\t\t\t Write into this file to define/undefine new trace events.\n"
5156#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005157#ifdef CONFIG_KPROBE_EVENTS
David Brazdil0f672f62019-12-10 10:32:29 +00005158 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005159 "\t\t\t Write into this file to define/undefine new trace events.\n"
5160#endif
5161#ifdef CONFIG_UPROBE_EVENTS
David Brazdil0f672f62019-12-10 10:32:29 +00005162 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005163 "\t\t\t Write into this file to define/undefine new trace events.\n"
5164#endif
5165#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5166 "\t accepts: event-definitions (one definition per line)\n"
5167 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5168 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005169#ifdef CONFIG_HIST_TRIGGERS
5170 "\t s:[synthetic/]<event> <field> [<field>]\n"
5171#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005172 "\t -:[<group>/]<event>\n"
5173#ifdef CONFIG_KPROBE_EVENTS
5174 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
Olivier Deprez157378f2022-04-04 15:47:50 +02005175 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005176#endif
5177#ifdef CONFIG_UPROBE_EVENTS
Olivier Deprez157378f2022-04-04 15:47:50 +02005178 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005179#endif
5180 "\t args: <name>=fetcharg[:type]\n"
5181 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005182#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5183 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5184#else
5185 "\t $stack<index>, $stack, $retval, $comm,\n"
5186#endif
5187 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5188 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5189 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5190 "\t <type>\\[<array-size>\\]\n"
5191#ifdef CONFIG_HIST_TRIGGERS
5192 "\t field: <stype> <name>;\n"
5193 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5194 "\t [unsigned] char/int/long\n"
5195#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005196#endif
5197 " events/\t\t- Directory containing all trace event subsystems:\n"
5198 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5199 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5200 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5201 "\t\t\t events\n"
5202 " filter\t\t- If set, only events passing filter are traced\n"
5203 " events/<system>/<event>/\t- Directory containing control files for\n"
5204 "\t\t\t <event>:\n"
5205 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5206 " filter\t\t- If set, only events passing filter are traced\n"
5207 " trigger\t\t- If set, a command to perform when event is hit\n"
5208 "\t Format: <trigger>[:count][if <filter>]\n"
5209 "\t trigger: traceon, traceoff\n"
5210 "\t enable_event:<system>:<event>\n"
5211 "\t disable_event:<system>:<event>\n"
5212#ifdef CONFIG_HIST_TRIGGERS
5213 "\t enable_hist:<system>:<event>\n"
5214 "\t disable_hist:<system>:<event>\n"
5215#endif
5216#ifdef CONFIG_STACKTRACE
5217 "\t\t stacktrace\n"
5218#endif
5219#ifdef CONFIG_TRACER_SNAPSHOT
5220 "\t\t snapshot\n"
5221#endif
5222#ifdef CONFIG_HIST_TRIGGERS
5223 "\t\t hist (see below)\n"
5224#endif
5225 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5226 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5227 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5228 "\t events/block/block_unplug/trigger\n"
5229 "\t The first disables tracing every time block_unplug is hit.\n"
5230 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5231 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5232 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5233 "\t Like function triggers, the counter is only decremented if it\n"
5234 "\t enabled or disabled tracing.\n"
5235 "\t To remove a trigger without a count:\n"
5236 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5237 "\t To remove a trigger with a count:\n"
5238 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5239 "\t Filters can be ignored when removing a trigger.\n"
5240#ifdef CONFIG_HIST_TRIGGERS
5241 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5242 "\t Format: hist:keys=<field1[,field2,...]>\n"
5243 "\t [:values=<field1[,field2,...]>]\n"
5244 "\t [:sort=<field1[,field2,...]>]\n"
5245 "\t [:size=#entries]\n"
5246 "\t [:pause][:continue][:clear]\n"
5247 "\t [:name=histname1]\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005248 "\t [:<handler>.<action>]\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005249 "\t [if <filter>]\n\n"
Olivier Deprez0e641232021-09-23 10:07:05 +02005250 "\t Note, special fields can be used as well:\n"
5251 "\t common_timestamp - to record current timestamp\n"
5252 "\t common_cpu - to record the CPU the event happened on\n"
5253 "\n"
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005254 "\t When a matching event is hit, an entry is added to a hash\n"
5255 "\t table using the key(s) and value(s) named, and the value of a\n"
5256 "\t sum called 'hitcount' is incremented. Keys and values\n"
5257 "\t correspond to fields in the event's format description. Keys\n"
5258 "\t can be any field, or the special string 'stacktrace'.\n"
5259 "\t Compound keys consisting of up to two fields can be specified\n"
5260 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5261 "\t fields. Sort keys consisting of up to two fields can be\n"
5262 "\t specified using the 'sort' keyword. The sort direction can\n"
5263 "\t be modified by appending '.descending' or '.ascending' to a\n"
5264 "\t sort field. The 'size' parameter can be used to specify more\n"
5265 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5266 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5267 "\t its histogram data will be shared with other triggers of the\n"
5268 "\t same name, and trigger hits will update this common data.\n\n"
5269 "\t Reading the 'hist' file for the event will dump the hash\n"
5270 "\t table in its entirety to stdout. If there are multiple hist\n"
5271 "\t triggers attached to an event, there will be a table for each\n"
5272 "\t trigger in the output. The table displayed for a named\n"
5273 "\t trigger will be the same as any other instance having the\n"
5274 "\t same name. The default format used to display a given field\n"
5275 "\t can be modified by appending any of the following modifiers\n"
5276 "\t to the field name, as applicable:\n\n"
5277 "\t .hex display a number as a hex value\n"
5278 "\t .sym display an address as a symbol\n"
5279 "\t .sym-offset display an address as a symbol and offset\n"
5280 "\t .execname display a common_pid as a program name\n"
5281 "\t .syscall display a syscall id as a syscall name\n"
5282 "\t .log2 display log2 value rather than raw number\n"
5283 "\t .usecs display a common_timestamp in microseconds\n\n"
5284 "\t The 'pause' parameter can be used to pause an existing hist\n"
5285 "\t trigger or to start a hist trigger but not log any events\n"
5286 "\t until told to do so. 'continue' can be used to start or\n"
5287 "\t restart a paused hist trigger.\n\n"
5288 "\t The 'clear' parameter will clear the contents of a running\n"
5289 "\t hist trigger and leave its current paused/active state\n"
5290 "\t unchanged.\n\n"
5291 "\t The enable_hist and disable_hist triggers can be used to\n"
5292 "\t have one event conditionally start and stop another event's\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005293 "\t already-attached hist trigger. The syntax is analogous to\n"
5294 "\t the enable_event and disable_event triggers.\n\n"
5295 "\t Hist trigger handlers and actions are executed whenever a\n"
5296 "\t a histogram entry is added or updated. They take the form:\n\n"
5297 "\t <handler>.<action>\n\n"
5298 "\t The available handlers are:\n\n"
5299 "\t onmatch(matching.event) - invoke on addition or update\n"
5300 "\t onmax(var) - invoke if var exceeds current max\n"
5301 "\t onchange(var) - invoke action if var changes\n\n"
5302 "\t The available actions are:\n\n"
5303 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5304 "\t save(field,...) - save current event fields\n"
5305#ifdef CONFIG_TRACER_SNAPSHOT
Olivier Deprez157378f2022-04-04 15:47:50 +02005306 "\t snapshot() - snapshot the trace buffer\n\n"
5307#endif
5308#ifdef CONFIG_SYNTH_EVENTS
5309 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5310 "\t Write into this file to define/undefine new synthetic events.\n"
5311 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
David Brazdil0f672f62019-12-10 10:32:29 +00005312#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005313#endif
5314;
5315
5316static ssize_t
5317tracing_readme_read(struct file *filp, char __user *ubuf,
5318 size_t cnt, loff_t *ppos)
5319{
5320 return simple_read_from_buffer(ubuf, cnt, ppos,
5321 readme_msg, strlen(readme_msg));
5322}
5323
5324static const struct file_operations tracing_readme_fops = {
5325 .open = tracing_open_generic,
5326 .read = tracing_readme_read,
5327 .llseek = generic_file_llseek,
5328};
5329
5330static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5331{
Olivier Deprez0e641232021-09-23 10:07:05 +02005332 int pid = ++(*pos);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005333
Olivier Deprez0e641232021-09-23 10:07:05 +02005334 return trace_find_tgid_ptr(pid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005335}
5336
5337static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5338{
Olivier Deprez0e641232021-09-23 10:07:05 +02005339 int pid = *pos;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005340
Olivier Deprez0e641232021-09-23 10:07:05 +02005341 return trace_find_tgid_ptr(pid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005342}
5343
5344static void saved_tgids_stop(struct seq_file *m, void *v)
5345{
5346}
5347
5348static int saved_tgids_show(struct seq_file *m, void *v)
5349{
Olivier Deprez0e641232021-09-23 10:07:05 +02005350 int *entry = (int *)v;
5351 int pid = entry - tgid_map;
5352 int tgid = *entry;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005353
Olivier Deprez0e641232021-09-23 10:07:05 +02005354 if (tgid == 0)
5355 return SEQ_SKIP;
5356
5357 seq_printf(m, "%d %d\n", pid, tgid);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005358 return 0;
5359}
5360
5361static const struct seq_operations tracing_saved_tgids_seq_ops = {
5362 .start = saved_tgids_start,
5363 .stop = saved_tgids_stop,
5364 .next = saved_tgids_next,
5365 .show = saved_tgids_show,
5366};
5367
5368static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5369{
David Brazdil0f672f62019-12-10 10:32:29 +00005370 int ret;
5371
5372 ret = tracing_check_open_get_tr(NULL);
5373 if (ret)
5374 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005375
5376 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5377}
5378
5379
5380static const struct file_operations tracing_saved_tgids_fops = {
5381 .open = tracing_saved_tgids_open,
5382 .read = seq_read,
5383 .llseek = seq_lseek,
5384 .release = seq_release,
5385};
5386
5387static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5388{
5389 unsigned int *ptr = v;
5390
5391 if (*pos || m->count)
5392 ptr++;
5393
5394 (*pos)++;
5395
5396 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5397 ptr++) {
5398 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5399 continue;
5400
5401 return ptr;
5402 }
5403
5404 return NULL;
5405}
5406
5407static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5408{
5409 void *v;
5410 loff_t l = 0;
5411
5412 preempt_disable();
5413 arch_spin_lock(&trace_cmdline_lock);
5414
5415 v = &savedcmd->map_cmdline_to_pid[0];
5416 while (l <= *pos) {
5417 v = saved_cmdlines_next(m, v, &l);
5418 if (!v)
5419 return NULL;
5420 }
5421
5422 return v;
5423}
5424
5425static void saved_cmdlines_stop(struct seq_file *m, void *v)
5426{
5427 arch_spin_unlock(&trace_cmdline_lock);
5428 preempt_enable();
5429}
5430
5431static int saved_cmdlines_show(struct seq_file *m, void *v)
5432{
5433 char buf[TASK_COMM_LEN];
5434 unsigned int *pid = v;
5435
5436 __trace_find_cmdline(*pid, buf);
5437 seq_printf(m, "%d %s\n", *pid, buf);
5438 return 0;
5439}
5440
5441static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5442 .start = saved_cmdlines_start,
5443 .next = saved_cmdlines_next,
5444 .stop = saved_cmdlines_stop,
5445 .show = saved_cmdlines_show,
5446};
5447
5448static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5449{
David Brazdil0f672f62019-12-10 10:32:29 +00005450 int ret;
5451
5452 ret = tracing_check_open_get_tr(NULL);
5453 if (ret)
5454 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005455
5456 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5457}
5458
5459static const struct file_operations tracing_saved_cmdlines_fops = {
5460 .open = tracing_saved_cmdlines_open,
5461 .read = seq_read,
5462 .llseek = seq_lseek,
5463 .release = seq_release,
5464};
5465
5466static ssize_t
5467tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5468 size_t cnt, loff_t *ppos)
5469{
5470 char buf[64];
5471 int r;
5472
5473 arch_spin_lock(&trace_cmdline_lock);
5474 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5475 arch_spin_unlock(&trace_cmdline_lock);
5476
5477 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5478}
5479
5480static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5481{
5482 kfree(s->saved_cmdlines);
5483 kfree(s->map_cmdline_to_pid);
5484 kfree(s);
5485}
5486
5487static int tracing_resize_saved_cmdlines(unsigned int val)
5488{
5489 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5490
5491 s = kmalloc(sizeof(*s), GFP_KERNEL);
5492 if (!s)
5493 return -ENOMEM;
5494
5495 if (allocate_cmdlines_buffer(val, s) < 0) {
5496 kfree(s);
5497 return -ENOMEM;
5498 }
5499
5500 arch_spin_lock(&trace_cmdline_lock);
5501 savedcmd_temp = savedcmd;
5502 savedcmd = s;
5503 arch_spin_unlock(&trace_cmdline_lock);
5504 free_saved_cmdlines_buffer(savedcmd_temp);
5505
5506 return 0;
5507}
5508
5509static ssize_t
5510tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5511 size_t cnt, loff_t *ppos)
5512{
5513 unsigned long val;
5514 int ret;
5515
5516 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5517 if (ret)
5518 return ret;
5519
5520 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5521 if (!val || val > PID_MAX_DEFAULT)
5522 return -EINVAL;
5523
5524 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5525 if (ret < 0)
5526 return ret;
5527
5528 *ppos += cnt;
5529
5530 return cnt;
5531}
5532
5533static const struct file_operations tracing_saved_cmdlines_size_fops = {
5534 .open = tracing_open_generic,
5535 .read = tracing_saved_cmdlines_size_read,
5536 .write = tracing_saved_cmdlines_size_write,
5537};
5538
5539#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5540static union trace_eval_map_item *
5541update_eval_map(union trace_eval_map_item *ptr)
5542{
5543 if (!ptr->map.eval_string) {
5544 if (ptr->tail.next) {
5545 ptr = ptr->tail.next;
5546 /* Set ptr to the next real item (skip head) */
5547 ptr++;
5548 } else
5549 return NULL;
5550 }
5551 return ptr;
5552}
5553
5554static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5555{
5556 union trace_eval_map_item *ptr = v;
5557
5558 /*
5559 * Paranoid! If ptr points to end, we don't want to increment past it.
5560 * This really should never happen.
5561 */
Olivier Deprez157378f2022-04-04 15:47:50 +02005562 (*pos)++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005563 ptr = update_eval_map(ptr);
5564 if (WARN_ON_ONCE(!ptr))
5565 return NULL;
5566
5567 ptr++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005568 ptr = update_eval_map(ptr);
5569
5570 return ptr;
5571}
5572
5573static void *eval_map_start(struct seq_file *m, loff_t *pos)
5574{
5575 union trace_eval_map_item *v;
5576 loff_t l = 0;
5577
5578 mutex_lock(&trace_eval_mutex);
5579
5580 v = trace_eval_maps;
5581 if (v)
5582 v++;
5583
5584 while (v && l < *pos) {
5585 v = eval_map_next(m, v, &l);
5586 }
5587
5588 return v;
5589}
5590
5591static void eval_map_stop(struct seq_file *m, void *v)
5592{
5593 mutex_unlock(&trace_eval_mutex);
5594}
5595
5596static int eval_map_show(struct seq_file *m, void *v)
5597{
5598 union trace_eval_map_item *ptr = v;
5599
5600 seq_printf(m, "%s %ld (%s)\n",
5601 ptr->map.eval_string, ptr->map.eval_value,
5602 ptr->map.system);
5603
5604 return 0;
5605}
5606
5607static const struct seq_operations tracing_eval_map_seq_ops = {
5608 .start = eval_map_start,
5609 .next = eval_map_next,
5610 .stop = eval_map_stop,
5611 .show = eval_map_show,
5612};
5613
5614static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5615{
David Brazdil0f672f62019-12-10 10:32:29 +00005616 int ret;
5617
5618 ret = tracing_check_open_get_tr(NULL);
5619 if (ret)
5620 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005621
5622 return seq_open(filp, &tracing_eval_map_seq_ops);
5623}
5624
5625static const struct file_operations tracing_eval_map_fops = {
5626 .open = tracing_eval_map_open,
5627 .read = seq_read,
5628 .llseek = seq_lseek,
5629 .release = seq_release,
5630};
5631
5632static inline union trace_eval_map_item *
5633trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5634{
5635 /* Return tail of array given the head */
5636 return ptr + ptr->head.length + 1;
5637}
5638
5639static void
5640trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5641 int len)
5642{
5643 struct trace_eval_map **stop;
5644 struct trace_eval_map **map;
5645 union trace_eval_map_item *map_array;
5646 union trace_eval_map_item *ptr;
5647
5648 stop = start + len;
5649
5650 /*
5651 * The trace_eval_maps contains the map plus a head and tail item,
5652 * where the head holds the module and length of array, and the
5653 * tail holds a pointer to the next list.
5654 */
5655 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5656 if (!map_array) {
5657 pr_warn("Unable to allocate trace eval mapping\n");
5658 return;
5659 }
5660
5661 mutex_lock(&trace_eval_mutex);
5662
5663 if (!trace_eval_maps)
5664 trace_eval_maps = map_array;
5665 else {
5666 ptr = trace_eval_maps;
5667 for (;;) {
5668 ptr = trace_eval_jmp_to_tail(ptr);
5669 if (!ptr->tail.next)
5670 break;
5671 ptr = ptr->tail.next;
5672
5673 }
5674 ptr->tail.next = map_array;
5675 }
5676 map_array->head.mod = mod;
5677 map_array->head.length = len;
5678 map_array++;
5679
5680 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5681 map_array->map = **map;
5682 map_array++;
5683 }
5684 memset(map_array, 0, sizeof(*map_array));
5685
5686 mutex_unlock(&trace_eval_mutex);
5687}
5688
5689static void trace_create_eval_file(struct dentry *d_tracer)
5690{
5691 trace_create_file("eval_map", 0444, d_tracer,
5692 NULL, &tracing_eval_map_fops);
5693}
5694
5695#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5696static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5697static inline void trace_insert_eval_map_file(struct module *mod,
5698 struct trace_eval_map **start, int len) { }
5699#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5700
5701static void trace_insert_eval_map(struct module *mod,
5702 struct trace_eval_map **start, int len)
5703{
5704 struct trace_eval_map **map;
5705
5706 if (len <= 0)
5707 return;
5708
5709 map = start;
5710
5711 trace_event_eval_update(map, len);
5712
5713 trace_insert_eval_map_file(mod, start, len);
5714}
5715
5716static ssize_t
5717tracing_set_trace_read(struct file *filp, char __user *ubuf,
5718 size_t cnt, loff_t *ppos)
5719{
5720 struct trace_array *tr = filp->private_data;
5721 char buf[MAX_TRACER_SIZE+2];
5722 int r;
5723
5724 mutex_lock(&trace_types_lock);
5725 r = sprintf(buf, "%s\n", tr->current_trace->name);
5726 mutex_unlock(&trace_types_lock);
5727
5728 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5729}
5730
5731int tracer_init(struct tracer *t, struct trace_array *tr)
5732{
Olivier Deprez157378f2022-04-04 15:47:50 +02005733 tracing_reset_online_cpus(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005734 return t->init(tr);
5735}
5736
Olivier Deprez157378f2022-04-04 15:47:50 +02005737static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005738{
5739 int cpu;
5740
5741 for_each_tracing_cpu(cpu)
5742 per_cpu_ptr(buf->data, cpu)->entries = val;
5743}
5744
5745#ifdef CONFIG_TRACER_MAX_TRACE
5746/* resize @tr's buffer to the size of @size_tr's entries */
Olivier Deprez157378f2022-04-04 15:47:50 +02005747static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5748 struct array_buffer *size_buf, int cpu_id)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005749{
5750 int cpu, ret = 0;
5751
5752 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5753 for_each_tracing_cpu(cpu) {
5754 ret = ring_buffer_resize(trace_buf->buffer,
5755 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5756 if (ret < 0)
5757 break;
5758 per_cpu_ptr(trace_buf->data, cpu)->entries =
5759 per_cpu_ptr(size_buf->data, cpu)->entries;
5760 }
5761 } else {
5762 ret = ring_buffer_resize(trace_buf->buffer,
5763 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5764 if (ret == 0)
5765 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5766 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5767 }
5768
5769 return ret;
5770}
5771#endif /* CONFIG_TRACER_MAX_TRACE */
5772
5773static int __tracing_resize_ring_buffer(struct trace_array *tr,
5774 unsigned long size, int cpu)
5775{
5776 int ret;
5777
5778 /*
5779 * If kernel or user changes the size of the ring buffer
5780 * we use the size that was given, and we can forget about
5781 * expanding it later.
5782 */
5783 ring_buffer_expanded = true;
5784
5785 /* May be called before buffers are initialized */
Olivier Deprez157378f2022-04-04 15:47:50 +02005786 if (!tr->array_buffer.buffer)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005787 return 0;
5788
Olivier Deprez157378f2022-04-04 15:47:50 +02005789 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005790 if (ret < 0)
5791 return ret;
5792
5793#ifdef CONFIG_TRACER_MAX_TRACE
5794 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5795 !tr->current_trace->use_max_tr)
5796 goto out;
5797
5798 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5799 if (ret < 0) {
Olivier Deprez157378f2022-04-04 15:47:50 +02005800 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5801 &tr->array_buffer, cpu);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005802 if (r < 0) {
5803 /*
5804 * AARGH! We are left with different
5805 * size max buffer!!!!
5806 * The max buffer is our "snapshot" buffer.
5807 * When a tracer needs a snapshot (one of the
5808 * latency tracers), it swaps the max buffer
5809 * with the saved snap shot. We succeeded to
5810 * update the size of the main buffer, but failed to
5811 * update the size of the max buffer. But when we tried
5812 * to reset the main buffer to the original size, we
5813 * failed there too. This is very unlikely to
5814 * happen, but if it does, warn and kill all
5815 * tracing.
5816 */
5817 WARN_ON(1);
5818 tracing_disabled = 1;
5819 }
5820 return ret;
5821 }
5822
5823 if (cpu == RING_BUFFER_ALL_CPUS)
5824 set_buffer_entries(&tr->max_buffer, size);
5825 else
5826 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5827
5828 out:
5829#endif /* CONFIG_TRACER_MAX_TRACE */
5830
5831 if (cpu == RING_BUFFER_ALL_CPUS)
Olivier Deprez157378f2022-04-04 15:47:50 +02005832 set_buffer_entries(&tr->array_buffer, size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005833 else
Olivier Deprez157378f2022-04-04 15:47:50 +02005834 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005835
5836 return ret;
5837}
5838
Olivier Deprez157378f2022-04-04 15:47:50 +02005839ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5840 unsigned long size, int cpu_id)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005841{
5842 int ret = size;
5843
5844 mutex_lock(&trace_types_lock);
5845
5846 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5847 /* make sure, this cpu is enabled in the mask */
5848 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5849 ret = -EINVAL;
5850 goto out;
5851 }
5852 }
5853
5854 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5855 if (ret < 0)
5856 ret = -ENOMEM;
5857
5858out:
5859 mutex_unlock(&trace_types_lock);
5860
5861 return ret;
5862}
5863
5864
5865/**
5866 * tracing_update_buffers - used by tracing facility to expand ring buffers
5867 *
5868 * To save on memory when the tracing is never used on a system with it
5869 * configured in. The ring buffers are set to a minimum size. But once
5870 * a user starts to use the tracing facility, then they need to grow
5871 * to their default size.
5872 *
5873 * This function is to be called when a tracer is about to be used.
5874 */
5875int tracing_update_buffers(void)
5876{
5877 int ret = 0;
5878
5879 mutex_lock(&trace_types_lock);
5880 if (!ring_buffer_expanded)
5881 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5882 RING_BUFFER_ALL_CPUS);
5883 mutex_unlock(&trace_types_lock);
5884
5885 return ret;
5886}
5887
5888struct trace_option_dentry;
5889
5890static void
5891create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5892
5893/*
5894 * Used to clear out the tracer before deletion of an instance.
5895 * Must have trace_types_lock held.
5896 */
5897static void tracing_set_nop(struct trace_array *tr)
5898{
5899 if (tr->current_trace == &nop_trace)
5900 return;
5901
5902 tr->current_trace->enabled--;
5903
5904 if (tr->current_trace->reset)
5905 tr->current_trace->reset(tr);
5906
5907 tr->current_trace = &nop_trace;
5908}
5909
5910static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5911{
5912 /* Only enable if the directory has been created already. */
5913 if (!tr->dir)
5914 return;
5915
5916 create_trace_option_files(tr, t);
5917}
5918
Olivier Deprez157378f2022-04-04 15:47:50 +02005919int tracing_set_tracer(struct trace_array *tr, const char *buf)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005920{
5921 struct tracer *t;
5922#ifdef CONFIG_TRACER_MAX_TRACE
5923 bool had_max_tr;
5924#endif
5925 int ret = 0;
5926
5927 mutex_lock(&trace_types_lock);
5928
5929 if (!ring_buffer_expanded) {
5930 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5931 RING_BUFFER_ALL_CPUS);
5932 if (ret < 0)
5933 goto out;
5934 ret = 0;
5935 }
5936
5937 for (t = trace_types; t; t = t->next) {
5938 if (strcmp(t->name, buf) == 0)
5939 break;
5940 }
5941 if (!t) {
5942 ret = -EINVAL;
5943 goto out;
5944 }
5945 if (t == tr->current_trace)
5946 goto out;
5947
David Brazdil0f672f62019-12-10 10:32:29 +00005948#ifdef CONFIG_TRACER_SNAPSHOT
5949 if (t->use_max_tr) {
5950 arch_spin_lock(&tr->max_lock);
5951 if (tr->cond_snapshot)
5952 ret = -EBUSY;
5953 arch_spin_unlock(&tr->max_lock);
5954 if (ret)
5955 goto out;
5956 }
5957#endif
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005958 /* Some tracers won't work on kernel command line */
5959 if (system_state < SYSTEM_RUNNING && t->noboot) {
5960 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5961 t->name);
5962 goto out;
5963 }
5964
5965 /* Some tracers are only allowed for the top level buffer */
5966 if (!trace_ok_for_array(t, tr)) {
5967 ret = -EINVAL;
5968 goto out;
5969 }
5970
5971 /* If trace pipe files are being read, we can't change the tracer */
Olivier Deprez0e641232021-09-23 10:07:05 +02005972 if (tr->trace_ref) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005973 ret = -EBUSY;
5974 goto out;
5975 }
5976
5977 trace_branch_disable();
5978
5979 tr->current_trace->enabled--;
5980
5981 if (tr->current_trace->reset)
5982 tr->current_trace->reset(tr);
5983
David Brazdil0f672f62019-12-10 10:32:29 +00005984 /* Current trace needs to be nop_trace before synchronize_rcu */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005985 tr->current_trace = &nop_trace;
5986
5987#ifdef CONFIG_TRACER_MAX_TRACE
5988 had_max_tr = tr->allocated_snapshot;
5989
5990 if (had_max_tr && !t->use_max_tr) {
5991 /*
5992 * We need to make sure that the update_max_tr sees that
5993 * current_trace changed to nop_trace to keep it from
5994 * swapping the buffers after we resize it.
5995 * The update_max_tr is called from interrupts disabled
5996 * so a synchronized_sched() is sufficient.
5997 */
David Brazdil0f672f62019-12-10 10:32:29 +00005998 synchronize_rcu();
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00005999 free_snapshot(tr);
6000 }
6001#endif
6002
6003#ifdef CONFIG_TRACER_MAX_TRACE
6004 if (t->use_max_tr && !had_max_tr) {
6005 ret = tracing_alloc_snapshot_instance(tr);
6006 if (ret < 0)
6007 goto out;
6008 }
6009#endif
6010
6011 if (t->init) {
6012 ret = tracer_init(t, tr);
6013 if (ret)
6014 goto out;
6015 }
6016
6017 tr->current_trace = t;
6018 tr->current_trace->enabled++;
6019 trace_branch_enable(tr);
6020 out:
6021 mutex_unlock(&trace_types_lock);
6022
6023 return ret;
6024}
6025
6026static ssize_t
6027tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6028 size_t cnt, loff_t *ppos)
6029{
6030 struct trace_array *tr = filp->private_data;
6031 char buf[MAX_TRACER_SIZE+1];
6032 int i;
6033 size_t ret;
6034 int err;
6035
6036 ret = cnt;
6037
6038 if (cnt > MAX_TRACER_SIZE)
6039 cnt = MAX_TRACER_SIZE;
6040
6041 if (copy_from_user(buf, ubuf, cnt))
6042 return -EFAULT;
6043
6044 buf[cnt] = 0;
6045
6046 /* strip ending whitespace. */
6047 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6048 buf[i] = 0;
6049
6050 err = tracing_set_tracer(tr, buf);
6051 if (err)
6052 return err;
6053
6054 *ppos += ret;
6055
6056 return ret;
6057}
6058
6059static ssize_t
6060tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6061 size_t cnt, loff_t *ppos)
6062{
6063 char buf[64];
6064 int r;
6065
6066 r = snprintf(buf, sizeof(buf), "%ld\n",
6067 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6068 if (r > sizeof(buf))
6069 r = sizeof(buf);
6070 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6071}
6072
6073static ssize_t
6074tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6075 size_t cnt, loff_t *ppos)
6076{
6077 unsigned long val;
6078 int ret;
6079
6080 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6081 if (ret)
6082 return ret;
6083
6084 *ptr = val * 1000;
6085
6086 return cnt;
6087}
6088
6089static ssize_t
6090tracing_thresh_read(struct file *filp, char __user *ubuf,
6091 size_t cnt, loff_t *ppos)
6092{
6093 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6094}
6095
6096static ssize_t
6097tracing_thresh_write(struct file *filp, const char __user *ubuf,
6098 size_t cnt, loff_t *ppos)
6099{
6100 struct trace_array *tr = filp->private_data;
6101 int ret;
6102
6103 mutex_lock(&trace_types_lock);
6104 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6105 if (ret < 0)
6106 goto out;
6107
6108 if (tr->current_trace->update_thresh) {
6109 ret = tr->current_trace->update_thresh(tr);
6110 if (ret < 0)
6111 goto out;
6112 }
6113
6114 ret = cnt;
6115out:
6116 mutex_unlock(&trace_types_lock);
6117
6118 return ret;
6119}
6120
6121#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6122
6123static ssize_t
6124tracing_max_lat_read(struct file *filp, char __user *ubuf,
6125 size_t cnt, loff_t *ppos)
6126{
6127 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6128}
6129
6130static ssize_t
6131tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6132 size_t cnt, loff_t *ppos)
6133{
6134 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6135}
6136
6137#endif
6138
6139static int tracing_open_pipe(struct inode *inode, struct file *filp)
6140{
6141 struct trace_array *tr = inode->i_private;
6142 struct trace_iterator *iter;
David Brazdil0f672f62019-12-10 10:32:29 +00006143 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006144
David Brazdil0f672f62019-12-10 10:32:29 +00006145 ret = tracing_check_open_get_tr(tr);
6146 if (ret)
6147 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006148
6149 mutex_lock(&trace_types_lock);
6150
6151 /* create a buffer to store the information to pass to userspace */
6152 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6153 if (!iter) {
6154 ret = -ENOMEM;
6155 __trace_array_put(tr);
6156 goto out;
6157 }
6158
6159 trace_seq_init(&iter->seq);
6160 iter->trace = tr->current_trace;
6161
6162 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6163 ret = -ENOMEM;
6164 goto fail;
6165 }
6166
6167 /* trace pipe does not show start of buffer */
6168 cpumask_setall(iter->started);
6169
6170 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6171 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6172
6173 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6174 if (trace_clocks[tr->clock_id].in_ns)
6175 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6176
6177 iter->tr = tr;
Olivier Deprez157378f2022-04-04 15:47:50 +02006178 iter->array_buffer = &tr->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006179 iter->cpu_file = tracing_get_cpu(inode);
6180 mutex_init(&iter->mutex);
6181 filp->private_data = iter;
6182
6183 if (iter->trace->pipe_open)
6184 iter->trace->pipe_open(iter);
6185
6186 nonseekable_open(inode, filp);
6187
Olivier Deprez0e641232021-09-23 10:07:05 +02006188 tr->trace_ref++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006189out:
6190 mutex_unlock(&trace_types_lock);
6191 return ret;
6192
6193fail:
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006194 kfree(iter);
6195 __trace_array_put(tr);
6196 mutex_unlock(&trace_types_lock);
6197 return ret;
6198}
6199
6200static int tracing_release_pipe(struct inode *inode, struct file *file)
6201{
6202 struct trace_iterator *iter = file->private_data;
6203 struct trace_array *tr = inode->i_private;
6204
6205 mutex_lock(&trace_types_lock);
6206
Olivier Deprez0e641232021-09-23 10:07:05 +02006207 tr->trace_ref--;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006208
6209 if (iter->trace->pipe_close)
6210 iter->trace->pipe_close(iter);
6211
6212 mutex_unlock(&trace_types_lock);
6213
6214 free_cpumask_var(iter->started);
6215 mutex_destroy(&iter->mutex);
6216 kfree(iter);
6217
6218 trace_array_put(tr);
6219
6220 return 0;
6221}
6222
6223static __poll_t
6224trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6225{
6226 struct trace_array *tr = iter->tr;
6227
6228 /* Iterators are static, they should be filled or empty */
6229 if (trace_buffer_iter(iter, iter->cpu_file))
6230 return EPOLLIN | EPOLLRDNORM;
6231
6232 if (tr->trace_flags & TRACE_ITER_BLOCK)
6233 /*
6234 * Always select as readable when in blocking mode
6235 */
6236 return EPOLLIN | EPOLLRDNORM;
6237 else
Olivier Deprez157378f2022-04-04 15:47:50 +02006238 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006239 filp, poll_table);
6240}
6241
6242static __poll_t
6243tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6244{
6245 struct trace_iterator *iter = filp->private_data;
6246
6247 return trace_poll(iter, filp, poll_table);
6248}
6249
6250/* Must be called with iter->mutex held. */
6251static int tracing_wait_pipe(struct file *filp)
6252{
6253 struct trace_iterator *iter = filp->private_data;
6254 int ret;
6255
6256 while (trace_empty(iter)) {
6257
6258 if ((filp->f_flags & O_NONBLOCK)) {
6259 return -EAGAIN;
6260 }
6261
6262 /*
6263 * We block until we read something and tracing is disabled.
6264 * We still block if tracing is disabled, but we have never
6265 * read anything. This allows a user to cat this file, and
6266 * then enable tracing. But after we have read something,
6267 * we give an EOF when tracing is again disabled.
6268 *
6269 * iter->pos will be 0 if we haven't read anything.
6270 */
6271 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6272 break;
6273
6274 mutex_unlock(&iter->mutex);
6275
David Brazdil0f672f62019-12-10 10:32:29 +00006276 ret = wait_on_pipe(iter, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006277
6278 mutex_lock(&iter->mutex);
6279
6280 if (ret)
6281 return ret;
6282 }
6283
6284 return 1;
6285}
6286
6287/*
6288 * Consumer reader.
6289 */
6290static ssize_t
6291tracing_read_pipe(struct file *filp, char __user *ubuf,
6292 size_t cnt, loff_t *ppos)
6293{
6294 struct trace_iterator *iter = filp->private_data;
6295 ssize_t sret;
6296
6297 /*
6298 * Avoid more than one consumer on a single file descriptor
6299 * This is just a matter of traces coherency, the ring buffer itself
6300 * is protected.
6301 */
6302 mutex_lock(&iter->mutex);
6303
6304 /* return any leftover data */
6305 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6306 if (sret != -EBUSY)
6307 goto out;
6308
6309 trace_seq_init(&iter->seq);
6310
6311 if (iter->trace->read) {
6312 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6313 if (sret)
6314 goto out;
6315 }
6316
6317waitagain:
6318 sret = tracing_wait_pipe(filp);
6319 if (sret <= 0)
6320 goto out;
6321
6322 /* stop when tracing is finished */
6323 if (trace_empty(iter)) {
6324 sret = 0;
6325 goto out;
6326 }
6327
6328 if (cnt >= PAGE_SIZE)
6329 cnt = PAGE_SIZE - 1;
6330
6331 /* reset all but tr, trace, and overruns */
6332 memset(&iter->seq, 0,
6333 sizeof(struct trace_iterator) -
6334 offsetof(struct trace_iterator, seq));
6335 cpumask_clear(iter->started);
David Brazdil0f672f62019-12-10 10:32:29 +00006336 trace_seq_init(&iter->seq);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006337 iter->pos = -1;
6338
6339 trace_event_read_lock();
6340 trace_access_lock(iter->cpu_file);
6341 while (trace_find_next_entry_inc(iter) != NULL) {
6342 enum print_line_t ret;
6343 int save_len = iter->seq.seq.len;
6344
6345 ret = print_trace_line(iter);
6346 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6347 /* don't print partial lines */
6348 iter->seq.seq.len = save_len;
6349 break;
6350 }
6351 if (ret != TRACE_TYPE_NO_CONSUME)
6352 trace_consume(iter);
6353
6354 if (trace_seq_used(&iter->seq) >= cnt)
6355 break;
6356
6357 /*
6358 * Setting the full flag means we reached the trace_seq buffer
6359 * size and we should leave by partial output condition above.
6360 * One of the trace_seq_* functions is not used properly.
6361 */
6362 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6363 iter->ent->type);
6364 }
6365 trace_access_unlock(iter->cpu_file);
6366 trace_event_read_unlock();
6367
6368 /* Now copy what we have to the user */
6369 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6370 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6371 trace_seq_init(&iter->seq);
6372
6373 /*
6374 * If there was nothing to send to user, in spite of consuming trace
6375 * entries, go back to wait for more entries.
6376 */
6377 if (sret == -EBUSY)
6378 goto waitagain;
6379
6380out:
6381 mutex_unlock(&iter->mutex);
6382
6383 return sret;
6384}
6385
6386static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6387 unsigned int idx)
6388{
6389 __free_page(spd->pages[idx]);
6390}
6391
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006392static size_t
6393tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6394{
6395 size_t count;
6396 int save_len;
6397 int ret;
6398
6399 /* Seq buffer is page-sized, exactly what we need. */
6400 for (;;) {
6401 save_len = iter->seq.seq.len;
6402 ret = print_trace_line(iter);
6403
6404 if (trace_seq_has_overflowed(&iter->seq)) {
6405 iter->seq.seq.len = save_len;
6406 break;
6407 }
6408
6409 /*
6410 * This should not be hit, because it should only
6411 * be set if the iter->seq overflowed. But check it
6412 * anyway to be safe.
6413 */
6414 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6415 iter->seq.seq.len = save_len;
6416 break;
6417 }
6418
6419 count = trace_seq_used(&iter->seq) - save_len;
6420 if (rem < count) {
6421 rem = 0;
6422 iter->seq.seq.len = save_len;
6423 break;
6424 }
6425
6426 if (ret != TRACE_TYPE_NO_CONSUME)
6427 trace_consume(iter);
6428 rem -= count;
6429 if (!trace_find_next_entry_inc(iter)) {
6430 rem = 0;
6431 iter->ent = NULL;
6432 break;
6433 }
6434 }
6435
6436 return rem;
6437}
6438
6439static ssize_t tracing_splice_read_pipe(struct file *filp,
6440 loff_t *ppos,
6441 struct pipe_inode_info *pipe,
6442 size_t len,
6443 unsigned int flags)
6444{
6445 struct page *pages_def[PIPE_DEF_BUFFERS];
6446 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6447 struct trace_iterator *iter = filp->private_data;
6448 struct splice_pipe_desc spd = {
6449 .pages = pages_def,
6450 .partial = partial_def,
6451 .nr_pages = 0, /* This gets updated below. */
6452 .nr_pages_max = PIPE_DEF_BUFFERS,
Olivier Deprez157378f2022-04-04 15:47:50 +02006453 .ops = &default_pipe_buf_ops,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006454 .spd_release = tracing_spd_release_pipe,
6455 };
6456 ssize_t ret;
6457 size_t rem;
6458 unsigned int i;
6459
6460 if (splice_grow_spd(pipe, &spd))
6461 return -ENOMEM;
6462
6463 mutex_lock(&iter->mutex);
6464
6465 if (iter->trace->splice_read) {
6466 ret = iter->trace->splice_read(iter, filp,
6467 ppos, pipe, len, flags);
6468 if (ret)
6469 goto out_err;
6470 }
6471
6472 ret = tracing_wait_pipe(filp);
6473 if (ret <= 0)
6474 goto out_err;
6475
6476 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6477 ret = -EFAULT;
6478 goto out_err;
6479 }
6480
6481 trace_event_read_lock();
6482 trace_access_lock(iter->cpu_file);
6483
6484 /* Fill as many pages as possible. */
6485 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6486 spd.pages[i] = alloc_page(GFP_KERNEL);
6487 if (!spd.pages[i])
6488 break;
6489
6490 rem = tracing_fill_pipe_page(rem, iter);
6491
6492 /* Copy the data into the page, so we can start over. */
6493 ret = trace_seq_to_buffer(&iter->seq,
6494 page_address(spd.pages[i]),
6495 trace_seq_used(&iter->seq));
6496 if (ret < 0) {
6497 __free_page(spd.pages[i]);
6498 break;
6499 }
6500 spd.partial[i].offset = 0;
6501 spd.partial[i].len = trace_seq_used(&iter->seq);
6502
6503 trace_seq_init(&iter->seq);
6504 }
6505
6506 trace_access_unlock(iter->cpu_file);
6507 trace_event_read_unlock();
6508 mutex_unlock(&iter->mutex);
6509
6510 spd.nr_pages = i;
6511
6512 if (i)
6513 ret = splice_to_pipe(pipe, &spd);
6514 else
6515 ret = 0;
6516out:
6517 splice_shrink_spd(&spd);
6518 return ret;
6519
6520out_err:
6521 mutex_unlock(&iter->mutex);
6522 goto out;
6523}
6524
6525static ssize_t
6526tracing_entries_read(struct file *filp, char __user *ubuf,
6527 size_t cnt, loff_t *ppos)
6528{
6529 struct inode *inode = file_inode(filp);
6530 struct trace_array *tr = inode->i_private;
6531 int cpu = tracing_get_cpu(inode);
6532 char buf[64];
6533 int r = 0;
6534 ssize_t ret;
6535
6536 mutex_lock(&trace_types_lock);
6537
6538 if (cpu == RING_BUFFER_ALL_CPUS) {
6539 int cpu, buf_size_same;
6540 unsigned long size;
6541
6542 size = 0;
6543 buf_size_same = 1;
6544 /* check if all cpu sizes are same */
6545 for_each_tracing_cpu(cpu) {
6546 /* fill in the size from first enabled cpu */
6547 if (size == 0)
Olivier Deprez157378f2022-04-04 15:47:50 +02006548 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6549 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006550 buf_size_same = 0;
6551 break;
6552 }
6553 }
6554
6555 if (buf_size_same) {
6556 if (!ring_buffer_expanded)
6557 r = sprintf(buf, "%lu (expanded: %lu)\n",
6558 size >> 10,
6559 trace_buf_size >> 10);
6560 else
6561 r = sprintf(buf, "%lu\n", size >> 10);
6562 } else
6563 r = sprintf(buf, "X\n");
6564 } else
Olivier Deprez157378f2022-04-04 15:47:50 +02006565 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006566
6567 mutex_unlock(&trace_types_lock);
6568
6569 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6570 return ret;
6571}
6572
6573static ssize_t
6574tracing_entries_write(struct file *filp, const char __user *ubuf,
6575 size_t cnt, loff_t *ppos)
6576{
6577 struct inode *inode = file_inode(filp);
6578 struct trace_array *tr = inode->i_private;
6579 unsigned long val;
6580 int ret;
6581
6582 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6583 if (ret)
6584 return ret;
6585
6586 /* must have at least 1 entry */
6587 if (!val)
6588 return -EINVAL;
6589
6590 /* value is in KB */
6591 val <<= 10;
6592 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6593 if (ret < 0)
6594 return ret;
6595
6596 *ppos += cnt;
6597
6598 return cnt;
6599}
6600
6601static ssize_t
6602tracing_total_entries_read(struct file *filp, char __user *ubuf,
6603 size_t cnt, loff_t *ppos)
6604{
6605 struct trace_array *tr = filp->private_data;
6606 char buf[64];
6607 int r, cpu;
6608 unsigned long size = 0, expanded_size = 0;
6609
6610 mutex_lock(&trace_types_lock);
6611 for_each_tracing_cpu(cpu) {
Olivier Deprez157378f2022-04-04 15:47:50 +02006612 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006613 if (!ring_buffer_expanded)
6614 expanded_size += trace_buf_size >> 10;
6615 }
6616 if (ring_buffer_expanded)
6617 r = sprintf(buf, "%lu\n", size);
6618 else
6619 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6620 mutex_unlock(&trace_types_lock);
6621
6622 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6623}
6624
6625static ssize_t
6626tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6627 size_t cnt, loff_t *ppos)
6628{
6629 /*
6630 * There is no need to read what the user has written, this function
6631 * is just to make sure that there is no error when "echo" is used
6632 */
6633
6634 *ppos += cnt;
6635
6636 return cnt;
6637}
6638
6639static int
6640tracing_free_buffer_release(struct inode *inode, struct file *filp)
6641{
6642 struct trace_array *tr = inode->i_private;
6643
6644 /* disable tracing ? */
6645 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6646 tracer_tracing_off(tr);
6647 /* resize the ring buffer to 0 */
6648 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6649
6650 trace_array_put(tr);
6651
6652 return 0;
6653}
6654
6655static ssize_t
6656tracing_mark_write(struct file *filp, const char __user *ubuf,
6657 size_t cnt, loff_t *fpos)
6658{
6659 struct trace_array *tr = filp->private_data;
6660 struct ring_buffer_event *event;
6661 enum event_trigger_type tt = ETT_NONE;
Olivier Deprez157378f2022-04-04 15:47:50 +02006662 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006663 struct print_entry *entry;
6664 unsigned long irq_flags;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006665 ssize_t written;
6666 int size;
6667 int len;
6668
6669/* Used in tracing_mark_raw_write() as well */
David Brazdil0f672f62019-12-10 10:32:29 +00006670#define FAULTED_STR "<faulted>"
6671#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006672
6673 if (tracing_disabled)
6674 return -EINVAL;
6675
6676 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6677 return -EINVAL;
6678
6679 if (cnt > TRACE_BUF_SIZE)
6680 cnt = TRACE_BUF_SIZE;
6681
6682 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6683
6684 local_save_flags(irq_flags);
6685 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6686
6687 /* If less than "<faulted>", then make sure we can still add that */
6688 if (cnt < FAULTED_SIZE)
6689 size += FAULTED_SIZE - cnt;
6690
Olivier Deprez157378f2022-04-04 15:47:50 +02006691 buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006692 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6693 irq_flags, preempt_count());
6694 if (unlikely(!event))
6695 /* Ring buffer disabled, return as if not open for write */
6696 return -EBADF;
6697
6698 entry = ring_buffer_event_data(event);
6699 entry->ip = _THIS_IP_;
6700
6701 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6702 if (len) {
David Brazdil0f672f62019-12-10 10:32:29 +00006703 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006704 cnt = FAULTED_SIZE;
6705 written = -EFAULT;
6706 } else
6707 written = cnt;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006708
6709 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6710 /* do not add \n before testing triggers, but add \0 */
6711 entry->buf[cnt] = '\0';
6712 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6713 }
6714
6715 if (entry->buf[cnt - 1] != '\n') {
6716 entry->buf[cnt] = '\n';
6717 entry->buf[cnt + 1] = '\0';
6718 } else
6719 entry->buf[cnt] = '\0';
6720
Olivier Deprez157378f2022-04-04 15:47:50 +02006721 if (static_branch_unlikely(&trace_marker_exports_enabled))
6722 ftrace_exports(event, TRACE_EXPORT_MARKER);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006723 __buffer_unlock_commit(buffer, event);
6724
6725 if (tt)
6726 event_triggers_post_call(tr->trace_marker_file, tt);
6727
6728 if (written > 0)
6729 *fpos += written;
6730
6731 return written;
6732}
6733
6734/* Limit it for now to 3K (including tag) */
6735#define RAW_DATA_MAX_SIZE (1024*3)
6736
6737static ssize_t
6738tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6739 size_t cnt, loff_t *fpos)
6740{
6741 struct trace_array *tr = filp->private_data;
6742 struct ring_buffer_event *event;
Olivier Deprez157378f2022-04-04 15:47:50 +02006743 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006744 struct raw_data_entry *entry;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006745 unsigned long irq_flags;
6746 ssize_t written;
6747 int size;
6748 int len;
6749
6750#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6751
6752 if (tracing_disabled)
6753 return -EINVAL;
6754
6755 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6756 return -EINVAL;
6757
6758 /* The marker must at least have a tag id */
6759 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6760 return -EINVAL;
6761
6762 if (cnt > TRACE_BUF_SIZE)
6763 cnt = TRACE_BUF_SIZE;
6764
6765 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6766
6767 local_save_flags(irq_flags);
6768 size = sizeof(*entry) + cnt;
6769 if (cnt < FAULT_SIZE_ID)
6770 size += FAULT_SIZE_ID - cnt;
6771
Olivier Deprez157378f2022-04-04 15:47:50 +02006772 buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006773 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6774 irq_flags, preempt_count());
6775 if (!event)
6776 /* Ring buffer disabled, return as if not open for write */
6777 return -EBADF;
6778
6779 entry = ring_buffer_event_data(event);
6780
6781 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6782 if (len) {
6783 entry->id = -1;
David Brazdil0f672f62019-12-10 10:32:29 +00006784 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006785 written = -EFAULT;
6786 } else
6787 written = cnt;
6788
6789 __buffer_unlock_commit(buffer, event);
6790
6791 if (written > 0)
6792 *fpos += written;
6793
6794 return written;
6795}
6796
6797static int tracing_clock_show(struct seq_file *m, void *v)
6798{
6799 struct trace_array *tr = m->private;
6800 int i;
6801
6802 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6803 seq_printf(m,
6804 "%s%s%s%s", i ? " " : "",
6805 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6806 i == tr->clock_id ? "]" : "");
6807 seq_putc(m, '\n');
6808
6809 return 0;
6810}
6811
6812int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6813{
6814 int i;
6815
6816 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6817 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6818 break;
6819 }
6820 if (i == ARRAY_SIZE(trace_clocks))
6821 return -EINVAL;
6822
6823 mutex_lock(&trace_types_lock);
6824
6825 tr->clock_id = i;
6826
Olivier Deprez157378f2022-04-04 15:47:50 +02006827 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006828
6829 /*
6830 * New clock may not be consistent with the previous clock.
6831 * Reset the buffer so that it doesn't have incomparable timestamps.
6832 */
Olivier Deprez157378f2022-04-04 15:47:50 +02006833 tracing_reset_online_cpus(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006834
6835#ifdef CONFIG_TRACER_MAX_TRACE
6836 if (tr->max_buffer.buffer)
6837 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6838 tracing_reset_online_cpus(&tr->max_buffer);
6839#endif
6840
6841 mutex_unlock(&trace_types_lock);
6842
6843 return 0;
6844}
6845
6846static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6847 size_t cnt, loff_t *fpos)
6848{
6849 struct seq_file *m = filp->private_data;
6850 struct trace_array *tr = m->private;
6851 char buf[64];
6852 const char *clockstr;
6853 int ret;
6854
6855 if (cnt >= sizeof(buf))
6856 return -EINVAL;
6857
6858 if (copy_from_user(buf, ubuf, cnt))
6859 return -EFAULT;
6860
6861 buf[cnt] = 0;
6862
6863 clockstr = strstrip(buf);
6864
6865 ret = tracing_set_clock(tr, clockstr);
6866 if (ret)
6867 return ret;
6868
6869 *fpos += cnt;
6870
6871 return cnt;
6872}
6873
6874static int tracing_clock_open(struct inode *inode, struct file *file)
6875{
6876 struct trace_array *tr = inode->i_private;
6877 int ret;
6878
David Brazdil0f672f62019-12-10 10:32:29 +00006879 ret = tracing_check_open_get_tr(tr);
6880 if (ret)
6881 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006882
6883 ret = single_open(file, tracing_clock_show, inode->i_private);
6884 if (ret < 0)
6885 trace_array_put(tr);
6886
6887 return ret;
6888}
6889
6890static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6891{
6892 struct trace_array *tr = m->private;
6893
6894 mutex_lock(&trace_types_lock);
6895
Olivier Deprez157378f2022-04-04 15:47:50 +02006896 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006897 seq_puts(m, "delta [absolute]\n");
6898 else
6899 seq_puts(m, "[delta] absolute\n");
6900
6901 mutex_unlock(&trace_types_lock);
6902
6903 return 0;
6904}
6905
6906static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6907{
6908 struct trace_array *tr = inode->i_private;
6909 int ret;
6910
David Brazdil0f672f62019-12-10 10:32:29 +00006911 ret = tracing_check_open_get_tr(tr);
6912 if (ret)
6913 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006914
6915 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6916 if (ret < 0)
6917 trace_array_put(tr);
6918
6919 return ret;
6920}
6921
6922int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6923{
6924 int ret = 0;
6925
6926 mutex_lock(&trace_types_lock);
6927
6928 if (abs && tr->time_stamp_abs_ref++)
6929 goto out;
6930
6931 if (!abs) {
6932 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6933 ret = -EINVAL;
6934 goto out;
6935 }
6936
6937 if (--tr->time_stamp_abs_ref)
6938 goto out;
6939 }
6940
Olivier Deprez157378f2022-04-04 15:47:50 +02006941 ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006942
6943#ifdef CONFIG_TRACER_MAX_TRACE
6944 if (tr->max_buffer.buffer)
6945 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6946#endif
6947 out:
6948 mutex_unlock(&trace_types_lock);
6949
6950 return ret;
6951}
6952
6953struct ftrace_buffer_info {
6954 struct trace_iterator iter;
6955 void *spare;
6956 unsigned int spare_cpu;
6957 unsigned int read;
6958};
6959
6960#ifdef CONFIG_TRACER_SNAPSHOT
6961static int tracing_snapshot_open(struct inode *inode, struct file *file)
6962{
6963 struct trace_array *tr = inode->i_private;
6964 struct trace_iterator *iter;
6965 struct seq_file *m;
David Brazdil0f672f62019-12-10 10:32:29 +00006966 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006967
David Brazdil0f672f62019-12-10 10:32:29 +00006968 ret = tracing_check_open_get_tr(tr);
6969 if (ret)
6970 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006971
6972 if (file->f_mode & FMODE_READ) {
6973 iter = __tracing_open(inode, file, true);
6974 if (IS_ERR(iter))
6975 ret = PTR_ERR(iter);
6976 } else {
6977 /* Writes still need the seq_file to hold the private data */
6978 ret = -ENOMEM;
6979 m = kzalloc(sizeof(*m), GFP_KERNEL);
6980 if (!m)
6981 goto out;
6982 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6983 if (!iter) {
6984 kfree(m);
6985 goto out;
6986 }
6987 ret = 0;
6988
6989 iter->tr = tr;
Olivier Deprez157378f2022-04-04 15:47:50 +02006990 iter->array_buffer = &tr->max_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00006991 iter->cpu_file = tracing_get_cpu(inode);
6992 m->private = iter;
6993 file->private_data = m;
6994 }
6995out:
6996 if (ret < 0)
6997 trace_array_put(tr);
6998
6999 return ret;
7000}
7001
7002static ssize_t
7003tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7004 loff_t *ppos)
7005{
7006 struct seq_file *m = filp->private_data;
7007 struct trace_iterator *iter = m->private;
7008 struct trace_array *tr = iter->tr;
7009 unsigned long val;
7010 int ret;
7011
7012 ret = tracing_update_buffers();
7013 if (ret < 0)
7014 return ret;
7015
7016 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7017 if (ret)
7018 return ret;
7019
7020 mutex_lock(&trace_types_lock);
7021
7022 if (tr->current_trace->use_max_tr) {
7023 ret = -EBUSY;
7024 goto out;
7025 }
7026
David Brazdil0f672f62019-12-10 10:32:29 +00007027 arch_spin_lock(&tr->max_lock);
7028 if (tr->cond_snapshot)
7029 ret = -EBUSY;
7030 arch_spin_unlock(&tr->max_lock);
7031 if (ret)
7032 goto out;
7033
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007034 switch (val) {
7035 case 0:
7036 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7037 ret = -EINVAL;
7038 break;
7039 }
7040 if (tr->allocated_snapshot)
7041 free_snapshot(tr);
7042 break;
7043 case 1:
7044/* Only allow per-cpu swap if the ring buffer supports it */
7045#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7046 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7047 ret = -EINVAL;
7048 break;
7049 }
7050#endif
David Brazdil0f672f62019-12-10 10:32:29 +00007051 if (tr->allocated_snapshot)
7052 ret = resize_buffer_duplicate_size(&tr->max_buffer,
Olivier Deprez157378f2022-04-04 15:47:50 +02007053 &tr->array_buffer, iter->cpu_file);
David Brazdil0f672f62019-12-10 10:32:29 +00007054 else
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007055 ret = tracing_alloc_snapshot_instance(tr);
David Brazdil0f672f62019-12-10 10:32:29 +00007056 if (ret < 0)
7057 break;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007058 local_irq_disable();
7059 /* Now, we're going to swap */
7060 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
David Brazdil0f672f62019-12-10 10:32:29 +00007061 update_max_tr(tr, current, smp_processor_id(), NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007062 else
7063 update_max_tr_single(tr, current, iter->cpu_file);
7064 local_irq_enable();
7065 break;
7066 default:
7067 if (tr->allocated_snapshot) {
7068 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7069 tracing_reset_online_cpus(&tr->max_buffer);
7070 else
David Brazdil0f672f62019-12-10 10:32:29 +00007071 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007072 }
7073 break;
7074 }
7075
7076 if (ret >= 0) {
7077 *ppos += cnt;
7078 ret = cnt;
7079 }
7080out:
7081 mutex_unlock(&trace_types_lock);
7082 return ret;
7083}
7084
7085static int tracing_snapshot_release(struct inode *inode, struct file *file)
7086{
7087 struct seq_file *m = file->private_data;
7088 int ret;
7089
7090 ret = tracing_release(inode, file);
7091
7092 if (file->f_mode & FMODE_READ)
7093 return ret;
7094
7095 /* If write only, the seq_file is just a stub */
7096 if (m)
7097 kfree(m->private);
7098 kfree(m);
7099
7100 return 0;
7101}
7102
7103static int tracing_buffers_open(struct inode *inode, struct file *filp);
7104static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7105 size_t count, loff_t *ppos);
7106static int tracing_buffers_release(struct inode *inode, struct file *file);
7107static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7108 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7109
7110static int snapshot_raw_open(struct inode *inode, struct file *filp)
7111{
7112 struct ftrace_buffer_info *info;
7113 int ret;
7114
David Brazdil0f672f62019-12-10 10:32:29 +00007115 /* The following checks for tracefs lockdown */
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007116 ret = tracing_buffers_open(inode, filp);
7117 if (ret < 0)
7118 return ret;
7119
7120 info = filp->private_data;
7121
7122 if (info->iter.trace->use_max_tr) {
7123 tracing_buffers_release(inode, filp);
7124 return -EBUSY;
7125 }
7126
7127 info->iter.snapshot = true;
Olivier Deprez157378f2022-04-04 15:47:50 +02007128 info->iter.array_buffer = &info->iter.tr->max_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007129
7130 return ret;
7131}
7132
7133#endif /* CONFIG_TRACER_SNAPSHOT */
7134
7135
7136static const struct file_operations tracing_thresh_fops = {
7137 .open = tracing_open_generic,
7138 .read = tracing_thresh_read,
7139 .write = tracing_thresh_write,
7140 .llseek = generic_file_llseek,
7141};
7142
7143#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7144static const struct file_operations tracing_max_lat_fops = {
7145 .open = tracing_open_generic,
7146 .read = tracing_max_lat_read,
7147 .write = tracing_max_lat_write,
7148 .llseek = generic_file_llseek,
7149};
7150#endif
7151
7152static const struct file_operations set_tracer_fops = {
7153 .open = tracing_open_generic,
7154 .read = tracing_set_trace_read,
7155 .write = tracing_set_trace_write,
7156 .llseek = generic_file_llseek,
7157};
7158
7159static const struct file_operations tracing_pipe_fops = {
7160 .open = tracing_open_pipe,
7161 .poll = tracing_poll_pipe,
7162 .read = tracing_read_pipe,
7163 .splice_read = tracing_splice_read_pipe,
7164 .release = tracing_release_pipe,
7165 .llseek = no_llseek,
7166};
7167
7168static const struct file_operations tracing_entries_fops = {
7169 .open = tracing_open_generic_tr,
7170 .read = tracing_entries_read,
7171 .write = tracing_entries_write,
7172 .llseek = generic_file_llseek,
7173 .release = tracing_release_generic_tr,
7174};
7175
7176static const struct file_operations tracing_total_entries_fops = {
7177 .open = tracing_open_generic_tr,
7178 .read = tracing_total_entries_read,
7179 .llseek = generic_file_llseek,
7180 .release = tracing_release_generic_tr,
7181};
7182
7183static const struct file_operations tracing_free_buffer_fops = {
7184 .open = tracing_open_generic_tr,
7185 .write = tracing_free_buffer_write,
7186 .release = tracing_free_buffer_release,
7187};
7188
7189static const struct file_operations tracing_mark_fops = {
7190 .open = tracing_open_generic_tr,
7191 .write = tracing_mark_write,
7192 .llseek = generic_file_llseek,
7193 .release = tracing_release_generic_tr,
7194};
7195
7196static const struct file_operations tracing_mark_raw_fops = {
7197 .open = tracing_open_generic_tr,
7198 .write = tracing_mark_raw_write,
7199 .llseek = generic_file_llseek,
7200 .release = tracing_release_generic_tr,
7201};
7202
7203static const struct file_operations trace_clock_fops = {
7204 .open = tracing_clock_open,
7205 .read = seq_read,
7206 .llseek = seq_lseek,
7207 .release = tracing_single_release_tr,
7208 .write = tracing_clock_write,
7209};
7210
7211static const struct file_operations trace_time_stamp_mode_fops = {
7212 .open = tracing_time_stamp_mode_open,
7213 .read = seq_read,
7214 .llseek = seq_lseek,
7215 .release = tracing_single_release_tr,
7216};
7217
7218#ifdef CONFIG_TRACER_SNAPSHOT
7219static const struct file_operations snapshot_fops = {
7220 .open = tracing_snapshot_open,
7221 .read = seq_read,
7222 .write = tracing_snapshot_write,
7223 .llseek = tracing_lseek,
7224 .release = tracing_snapshot_release,
7225};
7226
7227static const struct file_operations snapshot_raw_fops = {
7228 .open = snapshot_raw_open,
7229 .read = tracing_buffers_read,
7230 .release = tracing_buffers_release,
7231 .splice_read = tracing_buffers_splice_read,
7232 .llseek = no_llseek,
7233};
7234
7235#endif /* CONFIG_TRACER_SNAPSHOT */
7236
David Brazdil0f672f62019-12-10 10:32:29 +00007237#define TRACING_LOG_ERRS_MAX 8
7238#define TRACING_LOG_LOC_MAX 128
7239
7240#define CMD_PREFIX " Command: "
7241
7242struct err_info {
7243 const char **errs; /* ptr to loc-specific array of err strings */
7244 u8 type; /* index into errs -> specific err string */
7245 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7246 u64 ts;
7247};
7248
7249struct tracing_log_err {
7250 struct list_head list;
7251 struct err_info info;
7252 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7253 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7254};
7255
7256static DEFINE_MUTEX(tracing_err_log_lock);
7257
7258static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7259{
7260 struct tracing_log_err *err;
7261
7262 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7263 err = kzalloc(sizeof(*err), GFP_KERNEL);
7264 if (!err)
7265 err = ERR_PTR(-ENOMEM);
Olivier Deprez157378f2022-04-04 15:47:50 +02007266 else
7267 tr->n_err_log_entries++;
David Brazdil0f672f62019-12-10 10:32:29 +00007268
7269 return err;
7270 }
7271
7272 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7273 list_del(&err->list);
7274
7275 return err;
7276}
7277
7278/**
7279 * err_pos - find the position of a string within a command for error careting
7280 * @cmd: The tracing command that caused the error
7281 * @str: The string to position the caret at within @cmd
7282 *
7283 * Finds the position of the first occurence of @str within @cmd. The
7284 * return value can be passed to tracing_log_err() for caret placement
7285 * within @cmd.
7286 *
7287 * Returns the index within @cmd of the first occurence of @str or 0
7288 * if @str was not found.
7289 */
7290unsigned int err_pos(char *cmd, const char *str)
7291{
7292 char *found;
7293
7294 if (WARN_ON(!strlen(cmd)))
7295 return 0;
7296
7297 found = strstr(cmd, str);
7298 if (found)
7299 return found - cmd;
7300
7301 return 0;
7302}
7303
7304/**
7305 * tracing_log_err - write an error to the tracing error log
7306 * @tr: The associated trace array for the error (NULL for top level array)
7307 * @loc: A string describing where the error occurred
7308 * @cmd: The tracing command that caused the error
7309 * @errs: The array of loc-specific static error strings
7310 * @type: The index into errs[], which produces the specific static err string
7311 * @pos: The position the caret should be placed in the cmd
7312 *
7313 * Writes an error into tracing/error_log of the form:
7314 *
7315 * <loc>: error: <text>
7316 * Command: <cmd>
7317 * ^
7318 *
7319 * tracing/error_log is a small log file containing the last
7320 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7321 * unless there has been a tracing error, and the error log can be
7322 * cleared and have its memory freed by writing the empty string in
7323 * truncation mode to it i.e. echo > tracing/error_log.
7324 *
7325 * NOTE: the @errs array along with the @type param are used to
7326 * produce a static error string - this string is not copied and saved
7327 * when the error is logged - only a pointer to it is saved. See
7328 * existing callers for examples of how static strings are typically
7329 * defined for use with tracing_log_err().
7330 */
7331void tracing_log_err(struct trace_array *tr,
7332 const char *loc, const char *cmd,
7333 const char **errs, u8 type, u8 pos)
7334{
7335 struct tracing_log_err *err;
7336
7337 if (!tr)
7338 tr = &global_trace;
7339
7340 mutex_lock(&tracing_err_log_lock);
7341 err = get_tracing_log_err(tr);
7342 if (PTR_ERR(err) == -ENOMEM) {
7343 mutex_unlock(&tracing_err_log_lock);
7344 return;
7345 }
7346
7347 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7348 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7349
7350 err->info.errs = errs;
7351 err->info.type = type;
7352 err->info.pos = pos;
7353 err->info.ts = local_clock();
7354
7355 list_add_tail(&err->list, &tr->err_log);
7356 mutex_unlock(&tracing_err_log_lock);
7357}
7358
7359static void clear_tracing_err_log(struct trace_array *tr)
7360{
7361 struct tracing_log_err *err, *next;
7362
7363 mutex_lock(&tracing_err_log_lock);
7364 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7365 list_del(&err->list);
7366 kfree(err);
7367 }
7368
7369 tr->n_err_log_entries = 0;
7370 mutex_unlock(&tracing_err_log_lock);
7371}
7372
7373static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7374{
7375 struct trace_array *tr = m->private;
7376
7377 mutex_lock(&tracing_err_log_lock);
7378
7379 return seq_list_start(&tr->err_log, *pos);
7380}
7381
7382static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7383{
7384 struct trace_array *tr = m->private;
7385
7386 return seq_list_next(v, &tr->err_log, pos);
7387}
7388
7389static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7390{
7391 mutex_unlock(&tracing_err_log_lock);
7392}
7393
7394static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7395{
7396 u8 i;
7397
7398 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7399 seq_putc(m, ' ');
7400 for (i = 0; i < pos; i++)
7401 seq_putc(m, ' ');
7402 seq_puts(m, "^\n");
7403}
7404
7405static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7406{
7407 struct tracing_log_err *err = v;
7408
7409 if (err) {
7410 const char *err_text = err->info.errs[err->info.type];
7411 u64 sec = err->info.ts;
7412 u32 nsec;
7413
7414 nsec = do_div(sec, NSEC_PER_SEC);
7415 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7416 err->loc, err_text);
7417 seq_printf(m, "%s", err->cmd);
7418 tracing_err_log_show_pos(m, err->info.pos);
7419 }
7420
7421 return 0;
7422}
7423
7424static const struct seq_operations tracing_err_log_seq_ops = {
7425 .start = tracing_err_log_seq_start,
7426 .next = tracing_err_log_seq_next,
7427 .stop = tracing_err_log_seq_stop,
7428 .show = tracing_err_log_seq_show
7429};
7430
7431static int tracing_err_log_open(struct inode *inode, struct file *file)
7432{
7433 struct trace_array *tr = inode->i_private;
7434 int ret = 0;
7435
7436 ret = tracing_check_open_get_tr(tr);
7437 if (ret)
7438 return ret;
7439
7440 /* If this file was opened for write, then erase contents */
7441 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7442 clear_tracing_err_log(tr);
7443
7444 if (file->f_mode & FMODE_READ) {
7445 ret = seq_open(file, &tracing_err_log_seq_ops);
7446 if (!ret) {
7447 struct seq_file *m = file->private_data;
7448 m->private = tr;
7449 } else {
7450 trace_array_put(tr);
7451 }
7452 }
7453 return ret;
7454}
7455
7456static ssize_t tracing_err_log_write(struct file *file,
7457 const char __user *buffer,
7458 size_t count, loff_t *ppos)
7459{
7460 return count;
7461}
7462
7463static int tracing_err_log_release(struct inode *inode, struct file *file)
7464{
7465 struct trace_array *tr = inode->i_private;
7466
7467 trace_array_put(tr);
7468
7469 if (file->f_mode & FMODE_READ)
7470 seq_release(inode, file);
7471
7472 return 0;
7473}
7474
7475static const struct file_operations tracing_err_log_fops = {
7476 .open = tracing_err_log_open,
7477 .write = tracing_err_log_write,
7478 .read = seq_read,
7479 .llseek = seq_lseek,
7480 .release = tracing_err_log_release,
7481};
7482
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007483static int tracing_buffers_open(struct inode *inode, struct file *filp)
7484{
7485 struct trace_array *tr = inode->i_private;
7486 struct ftrace_buffer_info *info;
7487 int ret;
7488
David Brazdil0f672f62019-12-10 10:32:29 +00007489 ret = tracing_check_open_get_tr(tr);
7490 if (ret)
7491 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007492
Olivier Deprez157378f2022-04-04 15:47:50 +02007493 info = kvzalloc(sizeof(*info), GFP_KERNEL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007494 if (!info) {
7495 trace_array_put(tr);
7496 return -ENOMEM;
7497 }
7498
7499 mutex_lock(&trace_types_lock);
7500
7501 info->iter.tr = tr;
7502 info->iter.cpu_file = tracing_get_cpu(inode);
7503 info->iter.trace = tr->current_trace;
Olivier Deprez157378f2022-04-04 15:47:50 +02007504 info->iter.array_buffer = &tr->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007505 info->spare = NULL;
7506 /* Force reading ring buffer for first read */
7507 info->read = (unsigned int)-1;
7508
7509 filp->private_data = info;
7510
Olivier Deprez0e641232021-09-23 10:07:05 +02007511 tr->trace_ref++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007512
7513 mutex_unlock(&trace_types_lock);
7514
7515 ret = nonseekable_open(inode, filp);
7516 if (ret < 0)
7517 trace_array_put(tr);
7518
7519 return ret;
7520}
7521
7522static __poll_t
7523tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7524{
7525 struct ftrace_buffer_info *info = filp->private_data;
7526 struct trace_iterator *iter = &info->iter;
7527
7528 return trace_poll(iter, filp, poll_table);
7529}
7530
7531static ssize_t
7532tracing_buffers_read(struct file *filp, char __user *ubuf,
7533 size_t count, loff_t *ppos)
7534{
7535 struct ftrace_buffer_info *info = filp->private_data;
7536 struct trace_iterator *iter = &info->iter;
7537 ssize_t ret = 0;
7538 ssize_t size;
7539
7540 if (!count)
7541 return 0;
7542
7543#ifdef CONFIG_TRACER_MAX_TRACE
7544 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7545 return -EBUSY;
7546#endif
7547
7548 if (!info->spare) {
Olivier Deprez157378f2022-04-04 15:47:50 +02007549 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007550 iter->cpu_file);
7551 if (IS_ERR(info->spare)) {
7552 ret = PTR_ERR(info->spare);
7553 info->spare = NULL;
7554 } else {
7555 info->spare_cpu = iter->cpu_file;
7556 }
7557 }
7558 if (!info->spare)
7559 return ret;
7560
7561 /* Do we have previous read data to read? */
7562 if (info->read < PAGE_SIZE)
7563 goto read;
7564
7565 again:
7566 trace_access_lock(iter->cpu_file);
Olivier Deprez157378f2022-04-04 15:47:50 +02007567 ret = ring_buffer_read_page(iter->array_buffer->buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007568 &info->spare,
7569 count,
7570 iter->cpu_file, 0);
7571 trace_access_unlock(iter->cpu_file);
7572
7573 if (ret < 0) {
7574 if (trace_empty(iter)) {
7575 if ((filp->f_flags & O_NONBLOCK))
7576 return -EAGAIN;
7577
David Brazdil0f672f62019-12-10 10:32:29 +00007578 ret = wait_on_pipe(iter, 0);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007579 if (ret)
7580 return ret;
7581
7582 goto again;
7583 }
7584 return 0;
7585 }
7586
7587 info->read = 0;
7588 read:
7589 size = PAGE_SIZE - info->read;
7590 if (size > count)
7591 size = count;
7592
7593 ret = copy_to_user(ubuf, info->spare + info->read, size);
7594 if (ret == size)
7595 return -EFAULT;
7596
7597 size -= ret;
7598
7599 *ppos += size;
7600 info->read += size;
7601
7602 return size;
7603}
7604
7605static int tracing_buffers_release(struct inode *inode, struct file *file)
7606{
7607 struct ftrace_buffer_info *info = file->private_data;
7608 struct trace_iterator *iter = &info->iter;
7609
7610 mutex_lock(&trace_types_lock);
7611
Olivier Deprez0e641232021-09-23 10:07:05 +02007612 iter->tr->trace_ref--;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007613
7614 __trace_array_put(iter->tr);
7615
7616 if (info->spare)
Olivier Deprez157378f2022-04-04 15:47:50 +02007617 ring_buffer_free_read_page(iter->array_buffer->buffer,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007618 info->spare_cpu, info->spare);
Olivier Deprez157378f2022-04-04 15:47:50 +02007619 kvfree(info);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007620
7621 mutex_unlock(&trace_types_lock);
7622
7623 return 0;
7624}
7625
7626struct buffer_ref {
Olivier Deprez157378f2022-04-04 15:47:50 +02007627 struct trace_buffer *buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007628 void *page;
7629 int cpu;
David Brazdil0f672f62019-12-10 10:32:29 +00007630 refcount_t refcount;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007631};
7632
David Brazdil0f672f62019-12-10 10:32:29 +00007633static void buffer_ref_release(struct buffer_ref *ref)
7634{
7635 if (!refcount_dec_and_test(&ref->refcount))
7636 return;
7637 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7638 kfree(ref);
7639}
7640
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007641static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7642 struct pipe_buffer *buf)
7643{
7644 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7645
David Brazdil0f672f62019-12-10 10:32:29 +00007646 buffer_ref_release(ref);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007647 buf->private = 0;
7648}
7649
David Brazdil0f672f62019-12-10 10:32:29 +00007650static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007651 struct pipe_buffer *buf)
7652{
7653 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7654
David Brazdil0f672f62019-12-10 10:32:29 +00007655 if (refcount_read(&ref->refcount) > INT_MAX/2)
7656 return false;
7657
7658 refcount_inc(&ref->refcount);
7659 return true;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007660}
7661
7662/* Pipe buffer operations for a buffer. */
7663static const struct pipe_buf_operations buffer_pipe_buf_ops = {
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007664 .release = buffer_pipe_buf_release,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007665 .get = buffer_pipe_buf_get,
7666};
7667
7668/*
7669 * Callback from splice_to_pipe(), if we need to release some pages
7670 * at the end of the spd in case we error'ed out in filling the pipe.
7671 */
7672static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7673{
7674 struct buffer_ref *ref =
7675 (struct buffer_ref *)spd->partial[i].private;
7676
David Brazdil0f672f62019-12-10 10:32:29 +00007677 buffer_ref_release(ref);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007678 spd->partial[i].private = 0;
7679}
7680
7681static ssize_t
7682tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7683 struct pipe_inode_info *pipe, size_t len,
7684 unsigned int flags)
7685{
7686 struct ftrace_buffer_info *info = file->private_data;
7687 struct trace_iterator *iter = &info->iter;
7688 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7689 struct page *pages_def[PIPE_DEF_BUFFERS];
7690 struct splice_pipe_desc spd = {
7691 .pages = pages_def,
7692 .partial = partial_def,
7693 .nr_pages_max = PIPE_DEF_BUFFERS,
7694 .ops = &buffer_pipe_buf_ops,
7695 .spd_release = buffer_spd_release,
7696 };
7697 struct buffer_ref *ref;
7698 int entries, i;
7699 ssize_t ret = 0;
7700
7701#ifdef CONFIG_TRACER_MAX_TRACE
7702 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7703 return -EBUSY;
7704#endif
7705
7706 if (*ppos & (PAGE_SIZE - 1))
7707 return -EINVAL;
7708
7709 if (len & (PAGE_SIZE - 1)) {
7710 if (len < PAGE_SIZE)
7711 return -EINVAL;
7712 len &= PAGE_MASK;
7713 }
7714
7715 if (splice_grow_spd(pipe, &spd))
7716 return -ENOMEM;
7717
7718 again:
7719 trace_access_lock(iter->cpu_file);
Olivier Deprez157378f2022-04-04 15:47:50 +02007720 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007721
7722 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7723 struct page *page;
7724 int r;
7725
7726 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7727 if (!ref) {
7728 ret = -ENOMEM;
7729 break;
7730 }
7731
David Brazdil0f672f62019-12-10 10:32:29 +00007732 refcount_set(&ref->refcount, 1);
Olivier Deprez157378f2022-04-04 15:47:50 +02007733 ref->buffer = iter->array_buffer->buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007734 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7735 if (IS_ERR(ref->page)) {
7736 ret = PTR_ERR(ref->page);
7737 ref->page = NULL;
7738 kfree(ref);
7739 break;
7740 }
7741 ref->cpu = iter->cpu_file;
7742
7743 r = ring_buffer_read_page(ref->buffer, &ref->page,
7744 len, iter->cpu_file, 1);
7745 if (r < 0) {
7746 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7747 ref->page);
7748 kfree(ref);
7749 break;
7750 }
7751
7752 page = virt_to_page(ref->page);
7753
7754 spd.pages[i] = page;
7755 spd.partial[i].len = PAGE_SIZE;
7756 spd.partial[i].offset = 0;
7757 spd.partial[i].private = (unsigned long)ref;
7758 spd.nr_pages++;
7759 *ppos += PAGE_SIZE;
7760
Olivier Deprez157378f2022-04-04 15:47:50 +02007761 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007762 }
7763
7764 trace_access_unlock(iter->cpu_file);
7765 spd.nr_pages = i;
7766
7767 /* did we read anything? */
7768 if (!spd.nr_pages) {
7769 if (ret)
7770 goto out;
7771
7772 ret = -EAGAIN;
7773 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7774 goto out;
7775
David Brazdil0f672f62019-12-10 10:32:29 +00007776 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007777 if (ret)
7778 goto out;
7779
7780 goto again;
7781 }
7782
7783 ret = splice_to_pipe(pipe, &spd);
7784out:
7785 splice_shrink_spd(&spd);
7786
7787 return ret;
7788}
7789
7790static const struct file_operations tracing_buffers_fops = {
7791 .open = tracing_buffers_open,
7792 .read = tracing_buffers_read,
7793 .poll = tracing_buffers_poll,
7794 .release = tracing_buffers_release,
7795 .splice_read = tracing_buffers_splice_read,
7796 .llseek = no_llseek,
7797};
7798
7799static ssize_t
7800tracing_stats_read(struct file *filp, char __user *ubuf,
7801 size_t count, loff_t *ppos)
7802{
7803 struct inode *inode = file_inode(filp);
7804 struct trace_array *tr = inode->i_private;
Olivier Deprez157378f2022-04-04 15:47:50 +02007805 struct array_buffer *trace_buf = &tr->array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007806 int cpu = tracing_get_cpu(inode);
7807 struct trace_seq *s;
7808 unsigned long cnt;
7809 unsigned long long t;
7810 unsigned long usec_rem;
7811
7812 s = kmalloc(sizeof(*s), GFP_KERNEL);
7813 if (!s)
7814 return -ENOMEM;
7815
7816 trace_seq_init(s);
7817
7818 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7819 trace_seq_printf(s, "entries: %ld\n", cnt);
7820
7821 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7822 trace_seq_printf(s, "overrun: %ld\n", cnt);
7823
7824 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7825 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7826
7827 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7828 trace_seq_printf(s, "bytes: %ld\n", cnt);
7829
7830 if (trace_clocks[tr->clock_id].in_ns) {
7831 /* local or global for trace_clock */
7832 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7833 usec_rem = do_div(t, USEC_PER_SEC);
7834 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7835 t, usec_rem);
7836
7837 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7838 usec_rem = do_div(t, USEC_PER_SEC);
7839 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7840 } else {
7841 /* counter or tsc mode for trace_clock */
7842 trace_seq_printf(s, "oldest event ts: %llu\n",
7843 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7844
7845 trace_seq_printf(s, "now ts: %llu\n",
7846 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7847 }
7848
7849 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7850 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7851
7852 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7853 trace_seq_printf(s, "read events: %ld\n", cnt);
7854
7855 count = simple_read_from_buffer(ubuf, count, ppos,
7856 s->buffer, trace_seq_used(s));
7857
7858 kfree(s);
7859
7860 return count;
7861}
7862
7863static const struct file_operations tracing_stats_fops = {
7864 .open = tracing_open_generic_tr,
7865 .read = tracing_stats_read,
7866 .llseek = generic_file_llseek,
7867 .release = tracing_release_generic_tr,
7868};
7869
7870#ifdef CONFIG_DYNAMIC_FTRACE
7871
7872static ssize_t
7873tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7874 size_t cnt, loff_t *ppos)
7875{
Olivier Deprez157378f2022-04-04 15:47:50 +02007876 ssize_t ret;
7877 char *buf;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007878 int r;
7879
Olivier Deprez157378f2022-04-04 15:47:50 +02007880 /* 256 should be plenty to hold the amount needed */
7881 buf = kmalloc(256, GFP_KERNEL);
7882 if (!buf)
7883 return -ENOMEM;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007884
Olivier Deprez157378f2022-04-04 15:47:50 +02007885 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7886 ftrace_update_tot_cnt,
7887 ftrace_number_of_pages,
7888 ftrace_number_of_groups);
7889
7890 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7891 kfree(buf);
7892 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00007893}
7894
7895static const struct file_operations tracing_dyn_info_fops = {
7896 .open = tracing_open_generic,
7897 .read = tracing_read_dyn_info,
7898 .llseek = generic_file_llseek,
7899};
7900#endif /* CONFIG_DYNAMIC_FTRACE */
7901
7902#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7903static void
7904ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7905 struct trace_array *tr, struct ftrace_probe_ops *ops,
7906 void *data)
7907{
7908 tracing_snapshot_instance(tr);
7909}
7910
7911static void
7912ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7913 struct trace_array *tr, struct ftrace_probe_ops *ops,
7914 void *data)
7915{
7916 struct ftrace_func_mapper *mapper = data;
7917 long *count = NULL;
7918
7919 if (mapper)
7920 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7921
7922 if (count) {
7923
7924 if (*count <= 0)
7925 return;
7926
7927 (*count)--;
7928 }
7929
7930 tracing_snapshot_instance(tr);
7931}
7932
7933static int
7934ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7935 struct ftrace_probe_ops *ops, void *data)
7936{
7937 struct ftrace_func_mapper *mapper = data;
7938 long *count = NULL;
7939
7940 seq_printf(m, "%ps:", (void *)ip);
7941
7942 seq_puts(m, "snapshot");
7943
7944 if (mapper)
7945 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7946
7947 if (count)
7948 seq_printf(m, ":count=%ld\n", *count);
7949 else
7950 seq_puts(m, ":unlimited\n");
7951
7952 return 0;
7953}
7954
7955static int
7956ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7957 unsigned long ip, void *init_data, void **data)
7958{
7959 struct ftrace_func_mapper *mapper = *data;
7960
7961 if (!mapper) {
7962 mapper = allocate_ftrace_func_mapper();
7963 if (!mapper)
7964 return -ENOMEM;
7965 *data = mapper;
7966 }
7967
7968 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7969}
7970
7971static void
7972ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7973 unsigned long ip, void *data)
7974{
7975 struct ftrace_func_mapper *mapper = data;
7976
7977 if (!ip) {
7978 if (!mapper)
7979 return;
7980 free_ftrace_func_mapper(mapper, NULL);
7981 return;
7982 }
7983
7984 ftrace_func_mapper_remove_ip(mapper, ip);
7985}
7986
7987static struct ftrace_probe_ops snapshot_probe_ops = {
7988 .func = ftrace_snapshot,
7989 .print = ftrace_snapshot_print,
7990};
7991
7992static struct ftrace_probe_ops snapshot_count_probe_ops = {
7993 .func = ftrace_count_snapshot,
7994 .print = ftrace_snapshot_print,
7995 .init = ftrace_snapshot_init,
7996 .free = ftrace_snapshot_free,
7997};
7998
7999static int
8000ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8001 char *glob, char *cmd, char *param, int enable)
8002{
8003 struct ftrace_probe_ops *ops;
8004 void *count = (void *)-1;
8005 char *number;
8006 int ret;
8007
8008 if (!tr)
8009 return -ENODEV;
8010
8011 /* hash funcs only work with set_ftrace_filter */
8012 if (!enable)
8013 return -EINVAL;
8014
8015 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8016
8017 if (glob[0] == '!')
8018 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8019
8020 if (!param)
8021 goto out_reg;
8022
8023 number = strsep(&param, ":");
8024
8025 if (!strlen(number))
8026 goto out_reg;
8027
8028 /*
8029 * We use the callback data field (which is a pointer)
8030 * as our counter.
8031 */
8032 ret = kstrtoul(number, 0, (unsigned long *)&count);
8033 if (ret)
8034 return ret;
8035
8036 out_reg:
8037 ret = tracing_alloc_snapshot_instance(tr);
8038 if (ret < 0)
8039 goto out;
8040
8041 ret = register_ftrace_function_probe(glob, tr, ops, count);
8042
8043 out:
8044 return ret < 0 ? ret : 0;
8045}
8046
8047static struct ftrace_func_command ftrace_snapshot_cmd = {
8048 .name = "snapshot",
8049 .func = ftrace_trace_snapshot_callback,
8050};
8051
8052static __init int register_snapshot_cmd(void)
8053{
8054 return register_ftrace_command(&ftrace_snapshot_cmd);
8055}
8056#else
8057static inline __init int register_snapshot_cmd(void) { return 0; }
8058#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8059
8060static struct dentry *tracing_get_dentry(struct trace_array *tr)
8061{
8062 if (WARN_ON(!tr->dir))
8063 return ERR_PTR(-ENODEV);
8064
8065 /* Top directory uses NULL as the parent */
8066 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8067 return NULL;
8068
8069 /* All sub buffers have a descriptor */
8070 return tr->dir;
8071}
8072
8073static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8074{
8075 struct dentry *d_tracer;
8076
8077 if (tr->percpu_dir)
8078 return tr->percpu_dir;
8079
8080 d_tracer = tracing_get_dentry(tr);
8081 if (IS_ERR(d_tracer))
8082 return NULL;
8083
8084 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8085
Olivier Deprez157378f2022-04-04 15:47:50 +02008086 MEM_FAIL(!tr->percpu_dir,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008087 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8088
8089 return tr->percpu_dir;
8090}
8091
8092static struct dentry *
8093trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8094 void *data, long cpu, const struct file_operations *fops)
8095{
8096 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8097
8098 if (ret) /* See tracing_get_cpu() */
8099 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8100 return ret;
8101}
8102
8103static void
8104tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8105{
8106 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8107 struct dentry *d_cpu;
8108 char cpu_dir[30]; /* 30 characters should be more than enough */
8109
8110 if (!d_percpu)
8111 return;
8112
8113 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8114 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8115 if (!d_cpu) {
8116 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8117 return;
8118 }
8119
8120 /* per cpu trace_pipe */
8121 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8122 tr, cpu, &tracing_pipe_fops);
8123
8124 /* per cpu trace */
8125 trace_create_cpu_file("trace", 0644, d_cpu,
8126 tr, cpu, &tracing_fops);
8127
8128 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8129 tr, cpu, &tracing_buffers_fops);
8130
8131 trace_create_cpu_file("stats", 0444, d_cpu,
8132 tr, cpu, &tracing_stats_fops);
8133
8134 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8135 tr, cpu, &tracing_entries_fops);
8136
8137#ifdef CONFIG_TRACER_SNAPSHOT
8138 trace_create_cpu_file("snapshot", 0644, d_cpu,
8139 tr, cpu, &snapshot_fops);
8140
8141 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8142 tr, cpu, &snapshot_raw_fops);
8143#endif
8144}
8145
8146#ifdef CONFIG_FTRACE_SELFTEST
8147/* Let selftest have access to static functions in this file */
8148#include "trace_selftest.c"
8149#endif
8150
8151static ssize_t
8152trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8153 loff_t *ppos)
8154{
8155 struct trace_option_dentry *topt = filp->private_data;
8156 char *buf;
8157
8158 if (topt->flags->val & topt->opt->bit)
8159 buf = "1\n";
8160 else
8161 buf = "0\n";
8162
8163 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8164}
8165
8166static ssize_t
8167trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8168 loff_t *ppos)
8169{
8170 struct trace_option_dentry *topt = filp->private_data;
8171 unsigned long val;
8172 int ret;
8173
8174 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8175 if (ret)
8176 return ret;
8177
8178 if (val != 0 && val != 1)
8179 return -EINVAL;
8180
8181 if (!!(topt->flags->val & topt->opt->bit) != val) {
8182 mutex_lock(&trace_types_lock);
8183 ret = __set_tracer_option(topt->tr, topt->flags,
8184 topt->opt, !val);
8185 mutex_unlock(&trace_types_lock);
8186 if (ret)
8187 return ret;
8188 }
8189
8190 *ppos += cnt;
8191
8192 return cnt;
8193}
8194
8195
8196static const struct file_operations trace_options_fops = {
8197 .open = tracing_open_generic,
8198 .read = trace_options_read,
8199 .write = trace_options_write,
8200 .llseek = generic_file_llseek,
8201};
8202
8203/*
8204 * In order to pass in both the trace_array descriptor as well as the index
8205 * to the flag that the trace option file represents, the trace_array
8206 * has a character array of trace_flags_index[], which holds the index
8207 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8208 * The address of this character array is passed to the flag option file
8209 * read/write callbacks.
8210 *
8211 * In order to extract both the index and the trace_array descriptor,
8212 * get_tr_index() uses the following algorithm.
8213 *
8214 * idx = *ptr;
8215 *
8216 * As the pointer itself contains the address of the index (remember
8217 * index[1] == 1).
8218 *
8219 * Then to get the trace_array descriptor, by subtracting that index
8220 * from the ptr, we get to the start of the index itself.
8221 *
8222 * ptr - idx == &index[0]
8223 *
8224 * Then a simple container_of() from that pointer gets us to the
8225 * trace_array descriptor.
8226 */
8227static void get_tr_index(void *data, struct trace_array **ptr,
8228 unsigned int *pindex)
8229{
8230 *pindex = *(unsigned char *)data;
8231
8232 *ptr = container_of(data - *pindex, struct trace_array,
8233 trace_flags_index);
8234}
8235
8236static ssize_t
8237trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8238 loff_t *ppos)
8239{
8240 void *tr_index = filp->private_data;
8241 struct trace_array *tr;
8242 unsigned int index;
8243 char *buf;
8244
8245 get_tr_index(tr_index, &tr, &index);
8246
8247 if (tr->trace_flags & (1 << index))
8248 buf = "1\n";
8249 else
8250 buf = "0\n";
8251
8252 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8253}
8254
8255static ssize_t
8256trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8257 loff_t *ppos)
8258{
8259 void *tr_index = filp->private_data;
8260 struct trace_array *tr;
8261 unsigned int index;
8262 unsigned long val;
8263 int ret;
8264
8265 get_tr_index(tr_index, &tr, &index);
8266
8267 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8268 if (ret)
8269 return ret;
8270
8271 if (val != 0 && val != 1)
8272 return -EINVAL;
8273
Olivier Deprez0e641232021-09-23 10:07:05 +02008274 mutex_lock(&event_mutex);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008275 mutex_lock(&trace_types_lock);
8276 ret = set_tracer_flag(tr, 1 << index, val);
8277 mutex_unlock(&trace_types_lock);
Olivier Deprez0e641232021-09-23 10:07:05 +02008278 mutex_unlock(&event_mutex);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008279
8280 if (ret < 0)
8281 return ret;
8282
8283 *ppos += cnt;
8284
8285 return cnt;
8286}
8287
8288static const struct file_operations trace_options_core_fops = {
8289 .open = tracing_open_generic,
8290 .read = trace_options_core_read,
8291 .write = trace_options_core_write,
8292 .llseek = generic_file_llseek,
8293};
8294
8295struct dentry *trace_create_file(const char *name,
8296 umode_t mode,
8297 struct dentry *parent,
8298 void *data,
8299 const struct file_operations *fops)
8300{
8301 struct dentry *ret;
8302
8303 ret = tracefs_create_file(name, mode, parent, data, fops);
8304 if (!ret)
8305 pr_warn("Could not create tracefs '%s' entry\n", name);
8306
8307 return ret;
8308}
8309
8310
8311static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8312{
8313 struct dentry *d_tracer;
8314
8315 if (tr->options)
8316 return tr->options;
8317
8318 d_tracer = tracing_get_dentry(tr);
8319 if (IS_ERR(d_tracer))
8320 return NULL;
8321
8322 tr->options = tracefs_create_dir("options", d_tracer);
8323 if (!tr->options) {
8324 pr_warn("Could not create tracefs directory 'options'\n");
8325 return NULL;
8326 }
8327
8328 return tr->options;
8329}
8330
8331static void
8332create_trace_option_file(struct trace_array *tr,
8333 struct trace_option_dentry *topt,
8334 struct tracer_flags *flags,
8335 struct tracer_opt *opt)
8336{
8337 struct dentry *t_options;
8338
8339 t_options = trace_options_init_dentry(tr);
8340 if (!t_options)
8341 return;
8342
8343 topt->flags = flags;
8344 topt->opt = opt;
8345 topt->tr = tr;
8346
8347 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8348 &trace_options_fops);
8349
8350}
8351
8352static void
8353create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8354{
8355 struct trace_option_dentry *topts;
8356 struct trace_options *tr_topts;
8357 struct tracer_flags *flags;
8358 struct tracer_opt *opts;
8359 int cnt;
8360 int i;
8361
8362 if (!tracer)
8363 return;
8364
8365 flags = tracer->flags;
8366
8367 if (!flags || !flags->opts)
8368 return;
8369
8370 /*
8371 * If this is an instance, only create flags for tracers
8372 * the instance may have.
8373 */
8374 if (!trace_ok_for_array(tracer, tr))
8375 return;
8376
8377 for (i = 0; i < tr->nr_topts; i++) {
8378 /* Make sure there's no duplicate flags. */
8379 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8380 return;
8381 }
8382
8383 opts = flags->opts;
8384
8385 for (cnt = 0; opts[cnt].name; cnt++)
8386 ;
8387
8388 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8389 if (!topts)
8390 return;
8391
8392 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8393 GFP_KERNEL);
8394 if (!tr_topts) {
8395 kfree(topts);
8396 return;
8397 }
8398
8399 tr->topts = tr_topts;
8400 tr->topts[tr->nr_topts].tracer = tracer;
8401 tr->topts[tr->nr_topts].topts = topts;
8402 tr->nr_topts++;
8403
8404 for (cnt = 0; opts[cnt].name; cnt++) {
8405 create_trace_option_file(tr, &topts[cnt], flags,
8406 &opts[cnt]);
Olivier Deprez157378f2022-04-04 15:47:50 +02008407 MEM_FAIL(topts[cnt].entry == NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008408 "Failed to create trace option: %s",
8409 opts[cnt].name);
8410 }
8411}
8412
8413static struct dentry *
8414create_trace_option_core_file(struct trace_array *tr,
8415 const char *option, long index)
8416{
8417 struct dentry *t_options;
8418
8419 t_options = trace_options_init_dentry(tr);
8420 if (!t_options)
8421 return NULL;
8422
8423 return trace_create_file(option, 0644, t_options,
8424 (void *)&tr->trace_flags_index[index],
8425 &trace_options_core_fops);
8426}
8427
8428static void create_trace_options_dir(struct trace_array *tr)
8429{
8430 struct dentry *t_options;
8431 bool top_level = tr == &global_trace;
8432 int i;
8433
8434 t_options = trace_options_init_dentry(tr);
8435 if (!t_options)
8436 return;
8437
8438 for (i = 0; trace_options[i]; i++) {
8439 if (top_level ||
8440 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8441 create_trace_option_core_file(tr, trace_options[i], i);
8442 }
8443}
8444
8445static ssize_t
8446rb_simple_read(struct file *filp, char __user *ubuf,
8447 size_t cnt, loff_t *ppos)
8448{
8449 struct trace_array *tr = filp->private_data;
8450 char buf[64];
8451 int r;
8452
8453 r = tracer_tracing_is_on(tr);
8454 r = sprintf(buf, "%d\n", r);
8455
8456 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8457}
8458
8459static ssize_t
8460rb_simple_write(struct file *filp, const char __user *ubuf,
8461 size_t cnt, loff_t *ppos)
8462{
8463 struct trace_array *tr = filp->private_data;
Olivier Deprez157378f2022-04-04 15:47:50 +02008464 struct trace_buffer *buffer = tr->array_buffer.buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008465 unsigned long val;
8466 int ret;
8467
8468 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8469 if (ret)
8470 return ret;
8471
8472 if (buffer) {
8473 mutex_lock(&trace_types_lock);
8474 if (!!val == tracer_tracing_is_on(tr)) {
8475 val = 0; /* do nothing */
8476 } else if (val) {
8477 tracer_tracing_on(tr);
8478 if (tr->current_trace->start)
8479 tr->current_trace->start(tr);
8480 } else {
8481 tracer_tracing_off(tr);
8482 if (tr->current_trace->stop)
8483 tr->current_trace->stop(tr);
8484 }
8485 mutex_unlock(&trace_types_lock);
8486 }
8487
8488 (*ppos)++;
8489
8490 return cnt;
8491}
8492
8493static const struct file_operations rb_simple_fops = {
8494 .open = tracing_open_generic_tr,
8495 .read = rb_simple_read,
8496 .write = rb_simple_write,
8497 .release = tracing_release_generic_tr,
8498 .llseek = default_llseek,
8499};
8500
David Brazdil0f672f62019-12-10 10:32:29 +00008501static ssize_t
8502buffer_percent_read(struct file *filp, char __user *ubuf,
8503 size_t cnt, loff_t *ppos)
8504{
8505 struct trace_array *tr = filp->private_data;
8506 char buf[64];
8507 int r;
8508
8509 r = tr->buffer_percent;
8510 r = sprintf(buf, "%d\n", r);
8511
8512 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8513}
8514
8515static ssize_t
8516buffer_percent_write(struct file *filp, const char __user *ubuf,
8517 size_t cnt, loff_t *ppos)
8518{
8519 struct trace_array *tr = filp->private_data;
8520 unsigned long val;
8521 int ret;
8522
8523 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8524 if (ret)
8525 return ret;
8526
8527 if (val > 100)
8528 return -EINVAL;
8529
8530 if (!val)
8531 val = 1;
8532
8533 tr->buffer_percent = val;
8534
8535 (*ppos)++;
8536
8537 return cnt;
8538}
8539
8540static const struct file_operations buffer_percent_fops = {
8541 .open = tracing_open_generic_tr,
8542 .read = buffer_percent_read,
8543 .write = buffer_percent_write,
8544 .release = tracing_release_generic_tr,
8545 .llseek = default_llseek,
8546};
8547
8548static struct dentry *trace_instance_dir;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008549
8550static void
8551init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8552
8553static int
Olivier Deprez157378f2022-04-04 15:47:50 +02008554allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008555{
8556 enum ring_buffer_flags rb_flags;
8557
8558 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8559
8560 buf->tr = tr;
8561
8562 buf->buffer = ring_buffer_alloc(size, rb_flags);
8563 if (!buf->buffer)
8564 return -ENOMEM;
8565
8566 buf->data = alloc_percpu(struct trace_array_cpu);
8567 if (!buf->data) {
8568 ring_buffer_free(buf->buffer);
8569 buf->buffer = NULL;
8570 return -ENOMEM;
8571 }
8572
8573 /* Allocate the first page for all buffers */
Olivier Deprez157378f2022-04-04 15:47:50 +02008574 set_buffer_entries(&tr->array_buffer,
8575 ring_buffer_size(tr->array_buffer.buffer, 0));
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008576
8577 return 0;
8578}
8579
8580static int allocate_trace_buffers(struct trace_array *tr, int size)
8581{
8582 int ret;
8583
Olivier Deprez157378f2022-04-04 15:47:50 +02008584 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008585 if (ret)
8586 return ret;
8587
8588#ifdef CONFIG_TRACER_MAX_TRACE
8589 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8590 allocate_snapshot ? size : 1);
Olivier Deprez157378f2022-04-04 15:47:50 +02008591 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8592 ring_buffer_free(tr->array_buffer.buffer);
8593 tr->array_buffer.buffer = NULL;
8594 free_percpu(tr->array_buffer.data);
8595 tr->array_buffer.data = NULL;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008596 return -ENOMEM;
8597 }
8598 tr->allocated_snapshot = allocate_snapshot;
8599
8600 /*
8601 * Only the top level trace array gets its snapshot allocated
8602 * from the kernel command line.
8603 */
8604 allocate_snapshot = false;
8605#endif
Olivier Deprez0e641232021-09-23 10:07:05 +02008606
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008607 return 0;
8608}
8609
Olivier Deprez157378f2022-04-04 15:47:50 +02008610static void free_trace_buffer(struct array_buffer *buf)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008611{
8612 if (buf->buffer) {
8613 ring_buffer_free(buf->buffer);
8614 buf->buffer = NULL;
8615 free_percpu(buf->data);
8616 buf->data = NULL;
8617 }
8618}
8619
8620static void free_trace_buffers(struct trace_array *tr)
8621{
8622 if (!tr)
8623 return;
8624
Olivier Deprez157378f2022-04-04 15:47:50 +02008625 free_trace_buffer(&tr->array_buffer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008626
8627#ifdef CONFIG_TRACER_MAX_TRACE
8628 free_trace_buffer(&tr->max_buffer);
8629#endif
8630}
8631
8632static void init_trace_flags_index(struct trace_array *tr)
8633{
8634 int i;
8635
8636 /* Used by the trace options files */
8637 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8638 tr->trace_flags_index[i] = i;
8639}
8640
8641static void __update_tracer_options(struct trace_array *tr)
8642{
8643 struct tracer *t;
8644
8645 for (t = trace_types; t; t = t->next)
8646 add_tracer_options(tr, t);
8647}
8648
8649static void update_tracer_options(struct trace_array *tr)
8650{
8651 mutex_lock(&trace_types_lock);
8652 __update_tracer_options(tr);
8653 mutex_unlock(&trace_types_lock);
8654}
8655
Olivier Deprez157378f2022-04-04 15:47:50 +02008656/* Must have trace_types_lock held */
8657struct trace_array *trace_array_find(const char *instance)
8658{
8659 struct trace_array *tr, *found = NULL;
8660
8661 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8662 if (tr->name && strcmp(tr->name, instance) == 0) {
8663 found = tr;
8664 break;
8665 }
8666 }
8667
8668 return found;
8669}
8670
8671struct trace_array *trace_array_find_get(const char *instance)
8672{
8673 struct trace_array *tr;
8674
8675 mutex_lock(&trace_types_lock);
8676 tr = trace_array_find(instance);
8677 if (tr)
8678 tr->ref++;
8679 mutex_unlock(&trace_types_lock);
8680
8681 return tr;
8682}
8683
8684static int trace_array_create_dir(struct trace_array *tr)
8685{
8686 int ret;
8687
8688 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8689 if (!tr->dir)
8690 return -EINVAL;
8691
8692 ret = event_trace_add_tracer(tr->dir, tr);
8693 if (ret) {
8694 tracefs_remove(tr->dir);
8695 return ret;
8696 }
8697
8698 init_tracer_tracefs(tr, tr->dir);
8699 __update_tracer_options(tr);
8700
8701 return ret;
8702}
8703
8704static struct trace_array *trace_array_create(const char *name)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008705{
8706 struct trace_array *tr;
8707 int ret;
8708
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008709 ret = -ENOMEM;
8710 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8711 if (!tr)
Olivier Deprez157378f2022-04-04 15:47:50 +02008712 return ERR_PTR(ret);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008713
8714 tr->name = kstrdup(name, GFP_KERNEL);
8715 if (!tr->name)
8716 goto out_free_tr;
8717
8718 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8719 goto out_free_tr;
8720
8721 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8722
8723 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8724
8725 raw_spin_lock_init(&tr->start_lock);
8726
8727 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8728
8729 tr->current_trace = &nop_trace;
8730
8731 INIT_LIST_HEAD(&tr->systems);
8732 INIT_LIST_HEAD(&tr->events);
8733 INIT_LIST_HEAD(&tr->hist_vars);
David Brazdil0f672f62019-12-10 10:32:29 +00008734 INIT_LIST_HEAD(&tr->err_log);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008735
8736 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8737 goto out_free_tr;
8738
Olivier Deprez157378f2022-04-04 15:47:50 +02008739 if (ftrace_allocate_ftrace_ops(tr) < 0)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008740 goto out_free_tr;
8741
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008742 ftrace_init_trace_array(tr);
8743
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008744 init_trace_flags_index(tr);
Olivier Deprez157378f2022-04-04 15:47:50 +02008745
8746 if (trace_instance_dir) {
8747 ret = trace_array_create_dir(tr);
8748 if (ret)
8749 goto out_free_tr;
8750 } else
8751 __trace_early_add_events(tr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008752
8753 list_add(&tr->list, &ftrace_trace_arrays);
8754
Olivier Deprez157378f2022-04-04 15:47:50 +02008755 tr->ref++;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008756
David Brazdil0f672f62019-12-10 10:32:29 +00008757 return tr;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008758
8759 out_free_tr:
Olivier Deprez157378f2022-04-04 15:47:50 +02008760 ftrace_free_ftrace_ops(tr);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008761 free_trace_buffers(tr);
8762 free_cpumask_var(tr->tracing_cpumask);
8763 kfree(tr->name);
8764 kfree(tr);
8765
David Brazdil0f672f62019-12-10 10:32:29 +00008766 return ERR_PTR(ret);
8767}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008768
David Brazdil0f672f62019-12-10 10:32:29 +00008769static int instance_mkdir(const char *name)
8770{
Olivier Deprez157378f2022-04-04 15:47:50 +02008771 struct trace_array *tr;
8772 int ret;
8773
8774 mutex_lock(&event_mutex);
8775 mutex_lock(&trace_types_lock);
8776
8777 ret = -EEXIST;
8778 if (trace_array_find(name))
8779 goto out_unlock;
8780
8781 tr = trace_array_create(name);
8782
8783 ret = PTR_ERR_OR_ZERO(tr);
8784
8785out_unlock:
8786 mutex_unlock(&trace_types_lock);
8787 mutex_unlock(&event_mutex);
8788 return ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008789}
8790
Olivier Deprez157378f2022-04-04 15:47:50 +02008791/**
8792 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8793 * @name: The name of the trace array to be looked up/created.
8794 *
8795 * Returns pointer to trace array with given name.
8796 * NULL, if it cannot be created.
8797 *
8798 * NOTE: This function increments the reference counter associated with the
8799 * trace array returned. This makes sure it cannot be freed while in use.
8800 * Use trace_array_put() once the trace array is no longer needed.
8801 * If the trace_array is to be freed, trace_array_destroy() needs to
8802 * be called after the trace_array_put(), or simply let user space delete
8803 * it from the tracefs instances directory. But until the
8804 * trace_array_put() is called, user space can not delete it.
8805 *
8806 */
8807struct trace_array *trace_array_get_by_name(const char *name)
8808{
8809 struct trace_array *tr;
8810
8811 mutex_lock(&event_mutex);
8812 mutex_lock(&trace_types_lock);
8813
8814 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8815 if (tr->name && strcmp(tr->name, name) == 0)
8816 goto out_unlock;
8817 }
8818
8819 tr = trace_array_create(name);
8820
8821 if (IS_ERR(tr))
8822 tr = NULL;
8823out_unlock:
8824 if (tr)
8825 tr->ref++;
8826
8827 mutex_unlock(&trace_types_lock);
8828 mutex_unlock(&event_mutex);
8829 return tr;
8830}
8831EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8832
David Brazdil0f672f62019-12-10 10:32:29 +00008833static int __remove_instance(struct trace_array *tr)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008834{
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008835 int i;
8836
Olivier Deprez157378f2022-04-04 15:47:50 +02008837 /* Reference counter for a newly created trace array = 1. */
8838 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
David Brazdil0f672f62019-12-10 10:32:29 +00008839 return -EBUSY;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008840
8841 list_del(&tr->list);
8842
8843 /* Disable all the flags that were enabled coming in */
8844 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8845 if ((1 << i) & ZEROED_TRACE_FLAGS)
8846 set_tracer_flag(tr, 1 << i, 0);
8847 }
8848
8849 tracing_set_nop(tr);
8850 clear_ftrace_function_probes(tr);
8851 event_trace_del_tracer(tr);
8852 ftrace_clear_pids(tr);
8853 ftrace_destroy_function_files(tr);
Olivier Deprez157378f2022-04-04 15:47:50 +02008854 tracefs_remove(tr->dir);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008855 free_trace_buffers(tr);
8856
8857 for (i = 0; i < tr->nr_topts; i++) {
8858 kfree(tr->topts[i].topts);
8859 }
8860 kfree(tr->topts);
8861
8862 free_cpumask_var(tr->tracing_cpumask);
8863 kfree(tr->name);
8864 kfree(tr);
8865
David Brazdil0f672f62019-12-10 10:32:29 +00008866 return 0;
8867}
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008868
Olivier Deprez0e641232021-09-23 10:07:05 +02008869int trace_array_destroy(struct trace_array *this_tr)
David Brazdil0f672f62019-12-10 10:32:29 +00008870{
Olivier Deprez0e641232021-09-23 10:07:05 +02008871 struct trace_array *tr;
David Brazdil0f672f62019-12-10 10:32:29 +00008872 int ret;
8873
Olivier Deprez0e641232021-09-23 10:07:05 +02008874 if (!this_tr)
David Brazdil0f672f62019-12-10 10:32:29 +00008875 return -EINVAL;
8876
8877 mutex_lock(&event_mutex);
8878 mutex_lock(&trace_types_lock);
8879
Olivier Deprez0e641232021-09-23 10:07:05 +02008880 ret = -ENODEV;
8881
8882 /* Making sure trace array exists before destroying it. */
8883 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8884 if (tr == this_tr) {
8885 ret = __remove_instance(tr);
8886 break;
8887 }
8888 }
David Brazdil0f672f62019-12-10 10:32:29 +00008889
8890 mutex_unlock(&trace_types_lock);
8891 mutex_unlock(&event_mutex);
8892
8893 return ret;
8894}
8895EXPORT_SYMBOL_GPL(trace_array_destroy);
8896
8897static int instance_rmdir(const char *name)
8898{
8899 struct trace_array *tr;
8900 int ret;
8901
8902 mutex_lock(&event_mutex);
8903 mutex_lock(&trace_types_lock);
8904
8905 ret = -ENODEV;
Olivier Deprez157378f2022-04-04 15:47:50 +02008906 tr = trace_array_find(name);
8907 if (tr)
8908 ret = __remove_instance(tr);
David Brazdil0f672f62019-12-10 10:32:29 +00008909
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008910 mutex_unlock(&trace_types_lock);
8911 mutex_unlock(&event_mutex);
8912
8913 return ret;
8914}
8915
8916static __init void create_trace_instances(struct dentry *d_tracer)
8917{
Olivier Deprez157378f2022-04-04 15:47:50 +02008918 struct trace_array *tr;
8919
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008920 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8921 instance_mkdir,
8922 instance_rmdir);
Olivier Deprez157378f2022-04-04 15:47:50 +02008923 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008924 return;
Olivier Deprez157378f2022-04-04 15:47:50 +02008925
8926 mutex_lock(&event_mutex);
8927 mutex_lock(&trace_types_lock);
8928
8929 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8930 if (!tr->name)
8931 continue;
8932 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8933 "Failed to create instance directory\n"))
8934 break;
8935 }
8936
8937 mutex_unlock(&trace_types_lock);
8938 mutex_unlock(&event_mutex);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00008939}
8940
8941static void
8942init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8943{
8944 struct trace_event_file *file;
8945 int cpu;
8946
8947 trace_create_file("available_tracers", 0444, d_tracer,
8948 tr, &show_traces_fops);
8949
8950 trace_create_file("current_tracer", 0644, d_tracer,
8951 tr, &set_tracer_fops);
8952
8953 trace_create_file("tracing_cpumask", 0644, d_tracer,
8954 tr, &tracing_cpumask_fops);
8955
8956 trace_create_file("trace_options", 0644, d_tracer,
8957 tr, &tracing_iter_fops);
8958
8959 trace_create_file("trace", 0644, d_tracer,
8960 tr, &tracing_fops);
8961
8962 trace_create_file("trace_pipe", 0444, d_tracer,
8963 tr, &tracing_pipe_fops);
8964
8965 trace_create_file("buffer_size_kb", 0644, d_tracer,
8966 tr, &tracing_entries_fops);
8967
8968 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8969 tr, &tracing_total_entries_fops);
8970
8971 trace_create_file("free_buffer", 0200, d_tracer,
8972 tr, &tracing_free_buffer_fops);
8973
8974 trace_create_file("trace_marker", 0220, d_tracer,
8975 tr, &tracing_mark_fops);
8976
8977 file = __find_event_file(tr, "ftrace", "print");
8978 if (file && file->dir)
8979 trace_create_file("trigger", 0644, file->dir, file,
8980 &event_trigger_fops);
8981 tr->trace_marker_file = file;
8982
8983 trace_create_file("trace_marker_raw", 0220, d_tracer,
8984 tr, &tracing_mark_raw_fops);
8985
8986 trace_create_file("trace_clock", 0644, d_tracer, tr,
8987 &trace_clock_fops);
8988
8989 trace_create_file("tracing_on", 0644, d_tracer,
8990 tr, &rb_simple_fops);
8991
8992 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8993 &trace_time_stamp_mode_fops);
8994
David Brazdil0f672f62019-12-10 10:32:29 +00008995 tr->buffer_percent = 50;
8996
8997 trace_create_file("buffer_percent", 0444, d_tracer,
8998 tr, &buffer_percent_fops);
8999
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009000 create_trace_options_dir(tr);
9001
9002#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
Olivier Deprez157378f2022-04-04 15:47:50 +02009003 trace_create_maxlat_file(tr, d_tracer);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009004#endif
9005
9006 if (ftrace_create_function_files(tr, d_tracer))
Olivier Deprez157378f2022-04-04 15:47:50 +02009007 MEM_FAIL(1, "Could not allocate function filter files");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009008
9009#ifdef CONFIG_TRACER_SNAPSHOT
9010 trace_create_file("snapshot", 0644, d_tracer,
9011 tr, &snapshot_fops);
9012#endif
9013
David Brazdil0f672f62019-12-10 10:32:29 +00009014 trace_create_file("error_log", 0644, d_tracer,
9015 tr, &tracing_err_log_fops);
9016
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009017 for_each_tracing_cpu(cpu)
9018 tracing_init_tracefs_percpu(tr, cpu);
9019
9020 ftrace_init_tracefs(tr, d_tracer);
9021}
9022
9023static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9024{
9025 struct vfsmount *mnt;
9026 struct file_system_type *type;
9027
9028 /*
9029 * To maintain backward compatibility for tools that mount
9030 * debugfs to get to the tracing facility, tracefs is automatically
9031 * mounted to the debugfs/tracing directory.
9032 */
9033 type = get_fs_type("tracefs");
9034 if (!type)
9035 return NULL;
9036 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9037 put_filesystem(type);
9038 if (IS_ERR(mnt))
9039 return NULL;
9040 mntget(mnt);
9041
9042 return mnt;
9043}
9044
9045/**
9046 * tracing_init_dentry - initialize top level trace array
9047 *
9048 * This is called when creating files or directories in the tracing
9049 * directory. It is called via fs_initcall() by any of the boot up code
9050 * and expects to return the dentry of the top level tracing directory.
9051 */
Olivier Deprez157378f2022-04-04 15:47:50 +02009052int tracing_init_dentry(void)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009053{
9054 struct trace_array *tr = &global_trace;
9055
Olivier Deprez0e641232021-09-23 10:07:05 +02009056 if (security_locked_down(LOCKDOWN_TRACEFS)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02009057 pr_warn("Tracing disabled due to lockdown\n");
9058 return -EPERM;
Olivier Deprez0e641232021-09-23 10:07:05 +02009059 }
9060
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009061 /* The top level trace array uses NULL as parent */
9062 if (tr->dir)
Olivier Deprez157378f2022-04-04 15:47:50 +02009063 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009064
Olivier Deprez157378f2022-04-04 15:47:50 +02009065 if (WARN_ON(!tracefs_initialized()))
9066 return -ENODEV;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009067
9068 /*
9069 * As there may still be users that expect the tracing
9070 * files to exist in debugfs/tracing, we must automount
9071 * the tracefs file system there, so older tools still
9072 * work with the newer kerenl.
9073 */
9074 tr->dir = debugfs_create_automount("tracing", NULL,
9075 trace_automount, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009076
Olivier Deprez157378f2022-04-04 15:47:50 +02009077 return 0;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009078}
9079
9080extern struct trace_eval_map *__start_ftrace_eval_maps[];
9081extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9082
9083static void __init trace_eval_init(void)
9084{
9085 int len;
9086
9087 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9088 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9089}
9090
9091#ifdef CONFIG_MODULES
9092static void trace_module_add_evals(struct module *mod)
9093{
9094 if (!mod->num_trace_evals)
9095 return;
9096
9097 /*
9098 * Modules with bad taint do not have events created, do
9099 * not bother with enums either.
9100 */
9101 if (trace_module_has_bad_taint(mod))
9102 return;
9103
9104 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9105}
9106
9107#ifdef CONFIG_TRACE_EVAL_MAP_FILE
9108static void trace_module_remove_evals(struct module *mod)
9109{
9110 union trace_eval_map_item *map;
9111 union trace_eval_map_item **last = &trace_eval_maps;
9112
9113 if (!mod->num_trace_evals)
9114 return;
9115
9116 mutex_lock(&trace_eval_mutex);
9117
9118 map = trace_eval_maps;
9119
9120 while (map) {
9121 if (map->head.mod == mod)
9122 break;
9123 map = trace_eval_jmp_to_tail(map);
9124 last = &map->tail.next;
9125 map = map->tail.next;
9126 }
9127 if (!map)
9128 goto out;
9129
9130 *last = trace_eval_jmp_to_tail(map)->tail.next;
9131 kfree(map);
9132 out:
9133 mutex_unlock(&trace_eval_mutex);
9134}
9135#else
9136static inline void trace_module_remove_evals(struct module *mod) { }
9137#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9138
9139static int trace_module_notify(struct notifier_block *self,
9140 unsigned long val, void *data)
9141{
9142 struct module *mod = data;
9143
9144 switch (val) {
9145 case MODULE_STATE_COMING:
9146 trace_module_add_evals(mod);
9147 break;
9148 case MODULE_STATE_GOING:
9149 trace_module_remove_evals(mod);
9150 break;
9151 }
9152
Olivier Deprez157378f2022-04-04 15:47:50 +02009153 return NOTIFY_OK;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009154}
9155
9156static struct notifier_block trace_module_nb = {
9157 .notifier_call = trace_module_notify,
9158 .priority = 0,
9159};
9160#endif /* CONFIG_MODULES */
9161
9162static __init int tracer_init_tracefs(void)
9163{
Olivier Deprez157378f2022-04-04 15:47:50 +02009164 int ret;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009165
9166 trace_access_lock_init();
9167
Olivier Deprez157378f2022-04-04 15:47:50 +02009168 ret = tracing_init_dentry();
9169 if (ret)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009170 return 0;
9171
9172 event_trace_init();
9173
Olivier Deprez157378f2022-04-04 15:47:50 +02009174 init_tracer_tracefs(&global_trace, NULL);
9175 ftrace_init_tracefs_toplevel(&global_trace, NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009176
Olivier Deprez157378f2022-04-04 15:47:50 +02009177 trace_create_file("tracing_thresh", 0644, NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009178 &global_trace, &tracing_thresh_fops);
9179
Olivier Deprez157378f2022-04-04 15:47:50 +02009180 trace_create_file("README", 0444, NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009181 NULL, &tracing_readme_fops);
9182
Olivier Deprez157378f2022-04-04 15:47:50 +02009183 trace_create_file("saved_cmdlines", 0444, NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009184 NULL, &tracing_saved_cmdlines_fops);
9185
Olivier Deprez157378f2022-04-04 15:47:50 +02009186 trace_create_file("saved_cmdlines_size", 0644, NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009187 NULL, &tracing_saved_cmdlines_size_fops);
9188
Olivier Deprez157378f2022-04-04 15:47:50 +02009189 trace_create_file("saved_tgids", 0444, NULL,
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009190 NULL, &tracing_saved_tgids_fops);
9191
9192 trace_eval_init();
9193
Olivier Deprez157378f2022-04-04 15:47:50 +02009194 trace_create_eval_file(NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009195
9196#ifdef CONFIG_MODULES
9197 register_module_notifier(&trace_module_nb);
9198#endif
9199
9200#ifdef CONFIG_DYNAMIC_FTRACE
Olivier Deprez157378f2022-04-04 15:47:50 +02009201 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9202 NULL, &tracing_dyn_info_fops);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009203#endif
9204
Olivier Deprez157378f2022-04-04 15:47:50 +02009205 create_trace_instances(NULL);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009206
9207 update_tracer_options(&global_trace);
9208
9209 return 0;
9210}
9211
9212static int trace_panic_handler(struct notifier_block *this,
9213 unsigned long event, void *unused)
9214{
9215 if (ftrace_dump_on_oops)
9216 ftrace_dump(ftrace_dump_on_oops);
9217 return NOTIFY_OK;
9218}
9219
9220static struct notifier_block trace_panic_notifier = {
9221 .notifier_call = trace_panic_handler,
9222 .next = NULL,
9223 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9224};
9225
9226static int trace_die_handler(struct notifier_block *self,
9227 unsigned long val,
9228 void *data)
9229{
9230 switch (val) {
9231 case DIE_OOPS:
9232 if (ftrace_dump_on_oops)
9233 ftrace_dump(ftrace_dump_on_oops);
9234 break;
9235 default:
9236 break;
9237 }
9238 return NOTIFY_OK;
9239}
9240
9241static struct notifier_block trace_die_notifier = {
9242 .notifier_call = trace_die_handler,
9243 .priority = 200
9244};
9245
9246/*
9247 * printk is set to max of 1024, we really don't need it that big.
9248 * Nothing should be printing 1000 characters anyway.
9249 */
9250#define TRACE_MAX_PRINT 1000
9251
9252/*
9253 * Define here KERN_TRACE so that we have one place to modify
9254 * it if we decide to change what log level the ftrace dump
9255 * should be at.
9256 */
9257#define KERN_TRACE KERN_EMERG
9258
9259void
9260trace_printk_seq(struct trace_seq *s)
9261{
9262 /* Probably should print a warning here. */
9263 if (s->seq.len >= TRACE_MAX_PRINT)
9264 s->seq.len = TRACE_MAX_PRINT;
9265
9266 /*
9267 * More paranoid code. Although the buffer size is set to
9268 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9269 * an extra layer of protection.
9270 */
9271 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9272 s->seq.len = s->seq.size - 1;
9273
9274 /* should be zero ended, but we are paranoid. */
9275 s->buffer[s->seq.len] = 0;
9276
9277 printk(KERN_TRACE "%s", s->buffer);
9278
9279 trace_seq_init(s);
9280}
9281
9282void trace_init_global_iter(struct trace_iterator *iter)
9283{
9284 iter->tr = &global_trace;
9285 iter->trace = iter->tr->current_trace;
9286 iter->cpu_file = RING_BUFFER_ALL_CPUS;
Olivier Deprez157378f2022-04-04 15:47:50 +02009287 iter->array_buffer = &global_trace.array_buffer;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009288
9289 if (iter->trace && iter->trace->open)
9290 iter->trace->open(iter);
9291
9292 /* Annotate start of buffers if we had overruns */
Olivier Deprez157378f2022-04-04 15:47:50 +02009293 if (ring_buffer_overruns(iter->array_buffer->buffer))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009294 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9295
9296 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9297 if (trace_clocks[iter->tr->clock_id].in_ns)
9298 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9299}
9300
9301void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9302{
9303 /* use static because iter can be a bit big for the stack */
9304 static struct trace_iterator iter;
9305 static atomic_t dump_running;
9306 struct trace_array *tr = &global_trace;
9307 unsigned int old_userobj;
9308 unsigned long flags;
9309 int cnt = 0, cpu;
9310
9311 /* Only allow one dump user at a time. */
9312 if (atomic_inc_return(&dump_running) != 1) {
9313 atomic_dec(&dump_running);
9314 return;
9315 }
9316
9317 /*
9318 * Always turn off tracing when we dump.
9319 * We don't need to show trace output of what happens
9320 * between multiple crashes.
9321 *
9322 * If the user does a sysrq-z, then they can re-enable
9323 * tracing with echo 1 > tracing_on.
9324 */
9325 tracing_off();
9326
9327 local_irq_save(flags);
9328 printk_nmi_direct_enter();
9329
9330 /* Simulate the iterator */
9331 trace_init_global_iter(&iter);
Olivier Deprez157378f2022-04-04 15:47:50 +02009332 /* Can not use kmalloc for iter.temp */
9333 iter.temp = static_temp_buf;
9334 iter.temp_size = STATIC_TEMP_BUF_SIZE;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009335
9336 for_each_tracing_cpu(cpu) {
Olivier Deprez157378f2022-04-04 15:47:50 +02009337 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009338 }
9339
9340 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9341
9342 /* don't look at user memory in panic mode */
9343 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9344
9345 switch (oops_dump_mode) {
9346 case DUMP_ALL:
9347 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9348 break;
9349 case DUMP_ORIG:
9350 iter.cpu_file = raw_smp_processor_id();
9351 break;
9352 case DUMP_NONE:
9353 goto out_enable;
9354 default:
9355 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9356 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9357 }
9358
9359 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9360
9361 /* Did function tracer already get disabled? */
9362 if (ftrace_is_dead()) {
9363 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9364 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9365 }
9366
9367 /*
Olivier Deprez157378f2022-04-04 15:47:50 +02009368 * We need to stop all tracing on all CPUS to read
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009369 * the next buffer. This is a bit expensive, but is
9370 * not done often. We fill all what we can read,
9371 * and then release the locks again.
9372 */
9373
9374 while (!trace_empty(&iter)) {
9375
9376 if (!cnt)
9377 printk(KERN_TRACE "---------------------------------\n");
9378
9379 cnt++;
9380
David Brazdil0f672f62019-12-10 10:32:29 +00009381 trace_iterator_reset(&iter);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009382 iter.iter_flags |= TRACE_FILE_LAT_FMT;
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009383
9384 if (trace_find_next_entry_inc(&iter) != NULL) {
9385 int ret;
9386
9387 ret = print_trace_line(&iter);
9388 if (ret != TRACE_TYPE_NO_CONSUME)
9389 trace_consume(&iter);
9390 }
9391 touch_nmi_watchdog();
9392
9393 trace_printk_seq(&iter.seq);
9394 }
9395
9396 if (!cnt)
9397 printk(KERN_TRACE " (ftrace buffer empty)\n");
9398 else
9399 printk(KERN_TRACE "---------------------------------\n");
9400
9401 out_enable:
9402 tr->trace_flags |= old_userobj;
9403
9404 for_each_tracing_cpu(cpu) {
Olivier Deprez157378f2022-04-04 15:47:50 +02009405 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009406 }
9407 atomic_dec(&dump_running);
9408 printk_nmi_direct_exit();
9409 local_irq_restore(flags);
9410}
9411EXPORT_SYMBOL_GPL(ftrace_dump);
9412
9413int trace_run_command(const char *buf, int (*createfn)(int, char **))
9414{
9415 char **argv;
9416 int argc, ret;
9417
9418 argc = 0;
9419 ret = 0;
9420 argv = argv_split(GFP_KERNEL, buf, &argc);
9421 if (!argv)
9422 return -ENOMEM;
9423
9424 if (argc)
9425 ret = createfn(argc, argv);
9426
9427 argv_free(argv);
9428
9429 return ret;
9430}
9431
9432#define WRITE_BUFSIZE 4096
9433
9434ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9435 size_t count, loff_t *ppos,
9436 int (*createfn)(int, char **))
9437{
9438 char *kbuf, *buf, *tmp;
9439 int ret = 0;
9440 size_t done = 0;
9441 size_t size;
9442
9443 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9444 if (!kbuf)
9445 return -ENOMEM;
9446
9447 while (done < count) {
9448 size = count - done;
9449
9450 if (size >= WRITE_BUFSIZE)
9451 size = WRITE_BUFSIZE - 1;
9452
9453 if (copy_from_user(kbuf, buffer + done, size)) {
9454 ret = -EFAULT;
9455 goto out;
9456 }
9457 kbuf[size] = '\0';
9458 buf = kbuf;
9459 do {
9460 tmp = strchr(buf, '\n');
9461 if (tmp) {
9462 *tmp = '\0';
9463 size = tmp - buf + 1;
9464 } else {
9465 size = strlen(buf);
9466 if (done + size < count) {
9467 if (buf != kbuf)
9468 break;
9469 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9470 pr_warn("Line length is too long: Should be less than %d\n",
9471 WRITE_BUFSIZE - 2);
9472 ret = -EINVAL;
9473 goto out;
9474 }
9475 }
9476 done += size;
9477
9478 /* Remove comments */
9479 tmp = strchr(buf, '#');
9480
9481 if (tmp)
9482 *tmp = '\0';
9483
9484 ret = trace_run_command(buf, createfn);
9485 if (ret)
9486 goto out;
9487 buf += size;
9488
9489 } while (done < count);
9490 }
9491 ret = done;
9492
9493out:
9494 kfree(kbuf);
9495
9496 return ret;
9497}
9498
9499__init static int tracer_alloc_buffers(void)
9500{
9501 int ring_buf_size;
9502 int ret = -ENOMEM;
9503
Olivier Deprez0e641232021-09-23 10:07:05 +02009504
9505 if (security_locked_down(LOCKDOWN_TRACEFS)) {
Olivier Deprez157378f2022-04-04 15:47:50 +02009506 pr_warn("Tracing disabled due to lockdown\n");
Olivier Deprez0e641232021-09-23 10:07:05 +02009507 return -EPERM;
9508 }
9509
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009510 /*
Olivier Deprez157378f2022-04-04 15:47:50 +02009511 * Make sure we don't accidentally add more trace options
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009512 * than we have bits for.
9513 */
9514 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9515
9516 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9517 goto out;
9518
9519 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9520 goto out_free_buffer_mask;
9521
9522 /* Only allocate trace_printk buffers if a trace_printk exists */
Olivier Deprez0e641232021-09-23 10:07:05 +02009523 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009524 /* Must be called before global_trace.buffer is allocated */
9525 trace_printk_init_buffers();
9526
9527 /* To save memory, keep the ring buffer size to its minimum */
9528 if (ring_buffer_expanded)
9529 ring_buf_size = trace_buf_size;
9530 else
9531 ring_buf_size = 1;
9532
9533 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9534 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9535
9536 raw_spin_lock_init(&global_trace.start_lock);
9537
9538 /*
9539 * The prepare callbacks allocates some memory for the ring buffer. We
Olivier Deprez157378f2022-04-04 15:47:50 +02009540 * don't free the buffer if the CPU goes down. If we were to free
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009541 * the buffer, then the user would lose any trace that was in the
9542 * buffer. The memory will be removed once the "instance" is removed.
9543 */
9544 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9545 "trace/RB:preapre", trace_rb_cpu_prepare,
9546 NULL);
9547 if (ret < 0)
9548 goto out_free_cpumask;
9549 /* Used for event triggers */
9550 ret = -ENOMEM;
9551 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9552 if (!temp_buffer)
9553 goto out_rm_hp_state;
9554
9555 if (trace_create_savedcmd() < 0)
9556 goto out_free_temp_buffer;
9557
9558 /* TODO: make the number of buffers hot pluggable with CPUS */
9559 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
Olivier Deprez157378f2022-04-04 15:47:50 +02009560 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009561 goto out_free_savedcmd;
9562 }
9563
9564 if (global_trace.buffer_disabled)
9565 tracing_off();
9566
9567 if (trace_boot_clock) {
9568 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9569 if (ret < 0)
9570 pr_warn("Trace clock %s not defined, going back to default\n",
9571 trace_boot_clock);
9572 }
9573
9574 /*
9575 * register_tracer() might reference current_trace, so it
9576 * needs to be set before we register anything. This is
9577 * just a bootstrap of current_trace anyway.
9578 */
9579 global_trace.current_trace = &nop_trace;
9580
9581 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9582
9583 ftrace_init_global_array_ops(&global_trace);
9584
9585 init_trace_flags_index(&global_trace);
9586
9587 register_tracer(&nop_trace);
9588
9589 /* Function tracing may start here (via kernel command line) */
9590 init_function_trace();
9591
9592 /* All seems OK, enable tracing */
9593 tracing_disabled = 0;
9594
9595 atomic_notifier_chain_register(&panic_notifier_list,
9596 &trace_panic_notifier);
9597
9598 register_die_notifier(&trace_die_notifier);
9599
9600 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9601
9602 INIT_LIST_HEAD(&global_trace.systems);
9603 INIT_LIST_HEAD(&global_trace.events);
9604 INIT_LIST_HEAD(&global_trace.hist_vars);
David Brazdil0f672f62019-12-10 10:32:29 +00009605 INIT_LIST_HEAD(&global_trace.err_log);
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009606 list_add(&global_trace.list, &ftrace_trace_arrays);
9607
9608 apply_trace_boot_options();
9609
9610 register_snapshot_cmd();
9611
9612 return 0;
9613
9614out_free_savedcmd:
9615 free_saved_cmdlines_buffer(savedcmd);
9616out_free_temp_buffer:
9617 ring_buffer_free(temp_buffer);
9618out_rm_hp_state:
9619 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9620out_free_cpumask:
9621 free_cpumask_var(global_trace.tracing_cpumask);
9622out_free_buffer_mask:
9623 free_cpumask_var(tracing_buffer_mask);
9624out:
9625 return ret;
9626}
9627
9628void __init early_trace_init(void)
9629{
9630 if (tracepoint_printk) {
9631 tracepoint_print_iter =
9632 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
Olivier Deprez157378f2022-04-04 15:47:50 +02009633 if (MEM_FAIL(!tracepoint_print_iter,
9634 "Failed to allocate trace iterator\n"))
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009635 tracepoint_printk = 0;
9636 else
9637 static_key_enable(&tracepoint_printk_key.key);
9638 }
9639 tracer_alloc_buffers();
9640}
9641
9642void __init trace_init(void)
9643{
9644 trace_event_init();
9645}
9646
9647__init static int clear_boot_tracer(void)
9648{
9649 /*
9650 * The default tracer at boot buffer is an init section.
9651 * This function is called in lateinit. If we did not
9652 * find the boot tracer, then clear it out, to prevent
9653 * later registration from accessing the buffer that is
9654 * about to be freed.
9655 */
9656 if (!default_bootup_tracer)
9657 return 0;
9658
9659 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9660 default_bootup_tracer);
9661 default_bootup_tracer = NULL;
9662
9663 return 0;
9664}
9665
9666fs_initcall(tracer_init_tracefs);
9667late_initcall_sync(clear_boot_tracer);
9668
9669#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9670__init static int tracing_set_default_clock(void)
9671{
9672 /* sched_clock_stable() is determined in late_initcall */
9673 if (!trace_boot_clock && !sched_clock_stable()) {
Olivier Deprez0e641232021-09-23 10:07:05 +02009674 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9675 pr_warn("Can not set tracing clock due to lockdown\n");
9676 return -EPERM;
9677 }
9678
Andrew Scullb4b6d4a2019-01-02 15:54:55 +00009679 printk(KERN_WARNING
9680 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9681 "If you want to keep using the local clock, then add:\n"
9682 " \"trace_clock=local\"\n"
9683 "on the kernel command line\n");
9684 tracing_set_clock(&global_trace, "global");
9685 }
9686
9687 return 0;
9688}
9689late_initcall_sync(tracing_set_default_clock);
9690#endif