Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6a0ee91..5240ba9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -160,7 +160,8 @@
#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
static int tracing_set_tracer(struct trace_array *tr, const char *buf);
-static void ftrace_trace_userstack(struct ring_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer,
unsigned long flags, int pc);
#define MAX_TRACER_SIZE 100
@@ -1743,6 +1744,7 @@
pr_info("Running postponed tracer tests:\n");
+ tracing_selftest_running = true;
list_for_each_entry_safe(p, n, &postponed_selftests, list) {
/* This loop can take minutes when sanitizers are enabled, so
* lets make sure we allow RCU processing.
@@ -1765,6 +1767,7 @@
list_del(&p->list);
kfree(p);
}
+ tracing_selftest_running = false;
out:
mutex_unlock(&trace_types_lock);
@@ -1804,6 +1807,12 @@
return -1;
}
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Can not register tracer %s due to lockdown\n",
+ type->name);
+ return -EPERM;
+ }
+
mutex_lock(&trace_types_lock);
tracing_selftest_running = true;
@@ -1925,8 +1934,15 @@
}
}
+/*
+ * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
+ * is the tgid last observed corresponding to pid=i.
+ */
static int *tgid_map;
+/* The maximum valid index into tgid_map. */
+static size_t tgid_map_max;
+
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1939,9 +1955,6 @@
};
static struct saved_cmdlines_buffer *savedcmd;
-/* temporary disable recording */
-static atomic_t trace_record_taskinfo_disabled __read_mostly;
-
static inline char *get_saved_cmdlines(int idx)
{
return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -2131,14 +2144,13 @@
static int trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned tpid, idx;
/* treat recording of idle task as a success */
if (!tsk->pid)
return 1;
- if (unlikely(tsk->pid > PID_MAX_DEFAULT))
- return 0;
+ tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
/*
* It's not the end of the world if we don't get
@@ -2149,26 +2161,15 @@
if (!arch_spin_trylock(&trace_cmdline_lock))
return 0;
- idx = savedcmd->map_pid_to_cmdline[tsk->pid];
+ idx = savedcmd->map_pid_to_cmdline[tpid];
if (idx == NO_CMDLINE_MAP) {
idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = savedcmd->map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
- savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
-
+ savedcmd->map_pid_to_cmdline[tpid] = idx;
savedcmd->cmdline_idx = idx;
}
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
set_cmdline(idx, tsk->comm);
arch_spin_unlock(&trace_cmdline_lock);
@@ -2179,6 +2180,7 @@
static void __trace_find_cmdline(int pid, char comm[])
{
unsigned map;
+ int tpid;
if (!pid) {
strcpy(comm, "<idle>");
@@ -2190,16 +2192,16 @@
return;
}
- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
+ tpid = pid & (PID_MAX_DEFAULT - 1);
+ map = savedcmd->map_pid_to_cmdline[tpid];
+ if (map != NO_CMDLINE_MAP) {
+ tpid = savedcmd->map_cmdline_to_pid[map];
+ if (tpid == pid) {
+ strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+ return;
+ }
}
-
- map = savedcmd->map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
- strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
- else
- strcpy(comm, "<...>");
+ strcpy(comm, "<...>");
}
void trace_find_cmdline(int pid, char comm[])
@@ -2213,24 +2215,41 @@
preempt_enable();
}
+static int *trace_find_tgid_ptr(int pid)
+{
+ /*
+ * Pairs with the smp_store_release in set_tracer_flag() to ensure that
+ * if we observe a non-NULL tgid_map then we also observe the correct
+ * tgid_map_max.
+ */
+ int *map = smp_load_acquire(&tgid_map);
+
+ if (unlikely(!map || pid > tgid_map_max))
+ return NULL;
+
+ return &map[pid];
+}
+
int trace_find_tgid(int pid)
{
- if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
- return 0;
+ int *ptr = trace_find_tgid_ptr(pid);
- return tgid_map[pid];
+ return ptr ? *ptr : 0;
}
static int trace_save_tgid(struct task_struct *tsk)
{
+ int *ptr;
+
/* treat recording of idle task as a success */
if (!tsk->pid)
return 1;
- if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
+ ptr = trace_find_tgid_ptr(tsk->pid);
+ if (!ptr)
return 0;
- tgid_map[tsk->pid] = tsk->tgid;
+ *ptr = tsk->tgid;
return 1;
}
@@ -2238,8 +2257,6 @@
{
if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
return true;
- if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
- return true;
if (!__this_cpu_read(trace_taskinfo_save))
return true;
return false;
@@ -2489,7 +2506,7 @@
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
val = this_cpu_inc_return(trace_buffered_event_cnt);
- if (val == 1) {
+ if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
trace_event_setup(entry, type, flags, pc);
entry->array[0] = len;
return entry;
@@ -2502,7 +2519,7 @@
/*
* If tracing is off, but we have triggers enabled
* we still need to look at the event data. Use the temp_buffer
- * to store the trace event for the tigger to use. It's recusive
+ * to store the trace event for the trigger to use. It's recursive
* safe and will not be recorded anywhere.
*/
if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
@@ -2613,7 +2630,7 @@
* two. They are not that meaningful.
*/
ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
- ftrace_trace_userstack(buffer, flags, pc);
+ ftrace_trace_userstack(tr, buffer, flags, pc);
}
/*
@@ -2824,7 +2841,7 @@
stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
/* This should never happen. If it does, yell once and skip */
- if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+ if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
goto out;
/*
@@ -2848,7 +2865,8 @@
size = nr_entries * sizeof(unsigned long);
event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
- sizeof(*entry) + size, flags, pc);
+ (sizeof(*entry) - sizeof(entry->caller)) + size,
+ flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -2928,13 +2946,14 @@
static DEFINE_PER_CPU(int, user_stack_count);
static void
-ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
+ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
- if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
+ if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
/*
@@ -2973,7 +2992,8 @@
preempt_enable();
}
#else /* CONFIG_USER_STACKTRACE_SUPPORT */
-static void ftrace_trace_userstack(struct ring_buffer *buffer,
+static void ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer,
unsigned long flags, int pc)
{
}
@@ -3004,7 +3024,7 @@
/* Interrupts must see nesting incremented before we use the buffer */
barrier();
- return &buffer->buffer[buffer->nesting][0];
+ return &buffer->buffer[buffer->nesting - 1][0];
}
static void put_trace_buf(void)
@@ -3225,6 +3245,9 @@
if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
return 0;
+ if (!tr)
+ return -ENOENT;
+
va_start(ap, fmt);
ret = trace_array_vprintk(tr, ip, fmt, ap);
va_end(ap);
@@ -3456,9 +3479,6 @@
return ERR_PTR(-EBUSY);
#endif
- if (!iter->snapshot)
- atomic_inc(&trace_record_taskinfo_disabled);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
@@ -3501,9 +3521,6 @@
return;
#endif
- if (!iter->snapshot)
- atomic_dec(&trace_record_taskinfo_disabled);
-
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -3573,14 +3590,14 @@
static void print_lat_help_header(struct seq_file *m)
{
- seq_puts(m, "# _------=> CPU# \n"
- "# / _-----=> irqs-off \n"
- "# | / _----=> need-resched \n"
- "# || / _---=> hardirq/softirq \n"
- "# ||| / _--=> preempt-depth \n"
- "# |||| / delay \n"
- "# cmd pid ||||| time | caller \n"
- "# \\ / ||||| \\ | / \n");
+ seq_puts(m, "# _------=> CPU# \n"
+ "# / _-----=> irqs-off \n"
+ "# | / _----=> need-resched \n"
+ "# || / _---=> hardirq/softirq \n"
+ "# ||| / _--=> preempt-depth \n"
+ "# |||| / delay \n"
+ "# cmd pid ||||| time | caller \n"
+ "# \\ / ||||| \\ | / \n");
}
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
@@ -3601,26 +3618,26 @@
print_event_info(buf, m);
- seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
- seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
+ seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
+ seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
}
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
- const char *space = " ";
- int prec = tgid ? 10 : 2;
+ const char *space = " ";
+ int prec = tgid ? 12 : 2;
print_event_info(buf, m);
- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
- seq_printf(m, "# %.*s||| / delay\n", prec, space);
- seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
- seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
+ seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+ seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
+ seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
+ seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
+ seq_printf(m, "# %.*s||| / delay\n", prec, space);
+ seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+ seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
}
void
@@ -4590,6 +4607,12 @@
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
+ int *map;
+
+ if ((mask == TRACE_ITER_RECORD_TGID) ||
+ (mask == TRACE_ITER_RECORD_CMD))
+ lockdep_assert_held(&event_mutex);
+
/* do nothing if flag is already set */
if (!!(tr->trace_flags & mask) == !!enabled)
return 0;
@@ -4608,10 +4631,19 @@
trace_event_enable_cmd_record(enabled);
if (mask == TRACE_ITER_RECORD_TGID) {
- if (!tgid_map)
- tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
- sizeof(*tgid_map),
- GFP_KERNEL);
+ if (!tgid_map) {
+ tgid_map_max = pid_max;
+ map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
+ GFP_KERNEL);
+
+ /*
+ * Pairs with smp_load_acquire() in
+ * trace_find_tgid_ptr() to ensure that if it observes
+ * the tgid_map we just allocated then it also observes
+ * the corresponding tgid_map_max value.
+ */
+ smp_store_release(&tgid_map, map);
+ }
if (!tgid_map) {
tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
return -ENOMEM;
@@ -4657,6 +4689,7 @@
cmp += len;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = match_string(trace_options, -1, cmp);
@@ -4667,6 +4700,7 @@
ret = set_tracer_flag(tr, 1 << ret, !neg);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
/*
* If the first trailing whitespace is replaced with '\0' by strstrip,
@@ -4941,6 +4975,10 @@
"\t [:name=histname1]\n"
"\t [:<handler>.<action>]\n"
"\t [if <filter>]\n\n"
+ "\t Note, special fields can be used as well:\n"
+ "\t common_timestamp - to record current timestamp\n"
+ "\t common_cpu - to record the CPU the event happened on\n"
+ "\n"
"\t When a matching event is hit, an entry is added to a hash\n"
"\t table using the key(s) and value(s) named, and the value of a\n"
"\t sum called 'hitcount' is incremented. Keys and values\n"
@@ -5014,37 +5052,16 @@
static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
{
- int *ptr = v;
+ int pid = ++(*pos);
- if (*pos || m->count)
- ptr++;
-
- (*pos)++;
-
- for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
- if (trace_find_tgid(*ptr))
- return ptr;
- }
-
- return NULL;
+ return trace_find_tgid_ptr(pid);
}
static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
{
- void *v;
- loff_t l = 0;
+ int pid = *pos;
- if (!tgid_map)
- return NULL;
-
- v = &tgid_map[0];
- while (l <= *pos) {
- v = saved_tgids_next(m, v, &l);
- if (!v)
- return NULL;
- }
-
- return v;
+ return trace_find_tgid_ptr(pid);
}
static void saved_tgids_stop(struct seq_file *m, void *v)
@@ -5053,9 +5070,14 @@
static int saved_tgids_show(struct seq_file *m, void *v)
{
- int pid = (int *)v - tgid_map;
+ int *entry = (int *)v;
+ int pid = entry - tgid_map;
+ int tgid = *entry;
- seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
+ if (tgid == 0)
+ return SEQ_SKIP;
+
+ seq_printf(m, "%d %d\n", pid, tgid);
return 0;
}
@@ -5672,7 +5694,7 @@
}
/* If trace pipe files are being read, we can't change the tracer */
- if (tr->current_trace->ref) {
+ if (tr->trace_ref) {
ret = -EBUSY;
goto out;
}
@@ -5888,7 +5910,7 @@
nonseekable_open(inode, filp);
- tr->current_trace->ref++;
+ tr->trace_ref++;
out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -5907,7 +5929,7 @@
mutex_lock(&trace_types_lock);
- tr->current_trace->ref--;
+ tr->trace_ref--;
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
@@ -7216,7 +7238,7 @@
filp->private_data = info;
- tr->current_trace->ref++;
+ tr->trace_ref++;
mutex_unlock(&trace_types_lock);
@@ -7317,7 +7339,7 @@
mutex_lock(&trace_types_lock);
- iter->tr->current_trace->ref--;
+ iter->tr->trace_ref--;
__trace_array_put(iter->tr);
@@ -7972,9 +7994,11 @@
if (val != 0 && val != 1)
return -EINVAL;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = set_tracer_flag(tr, 1 << index, val);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
if (ret < 0)
return ret;
@@ -8302,6 +8326,19 @@
*/
allocate_snapshot = false;
#endif
+
+ /*
+ * Because of some magic with the way alloc_percpu() works on
+ * x86_64, we need to synchronize the pgd of all the tables,
+ * otherwise the trace events that happen in x86_64 page fault
+ * handlers can't cope with accessing the chance that a
+ * alloc_percpu()'d memory might be touched in the page fault trace
+ * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
+ * calls in tracing, because something might get triggered within a
+ * page fault trace event!
+ */
+ vmalloc_sync_mappings();
+
return 0;
}
@@ -8441,7 +8478,7 @@
{
int i;
- if (tr->ref || (tr->current_trace && tr->current_trace->ref))
+ if (tr->ref || (tr->current_trace && tr->trace_ref))
return -EBUSY;
list_del(&tr->list);
@@ -8473,17 +8510,26 @@
return 0;
}
-int trace_array_destroy(struct trace_array *tr)
+int trace_array_destroy(struct trace_array *this_tr)
{
+ struct trace_array *tr;
int ret;
- if (!tr)
+ if (!this_tr)
return -EINVAL;
mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
- ret = __remove_instance(tr);
+ ret = -ENODEV;
+
+ /* Making sure trace array exists before destroying it. */
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == this_tr) {
+ ret = __remove_instance(tr);
+ break;
+ }
+ }
mutex_unlock(&trace_types_lock);
mutex_unlock(&event_mutex);
@@ -8639,6 +8685,11 @@
{
struct trace_array *tr = &global_trace;
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Tracing disabled due to lockdown\n");
+ return ERR_PTR(-EPERM);
+ }
+
/* The top level trace array uses NULL as parent */
if (tr->dir)
return NULL;
@@ -9081,6 +9132,12 @@
int ring_buf_size;
int ret = -ENOMEM;
+
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warning("Tracing disabled due to lockdown\n");
+ return -EPERM;
+ }
+
/*
* Make sure we don't accidently add more trace options
* than we have bits for.
@@ -9094,7 +9151,7 @@
goto out_free_buffer_mask;
/* Only allocate trace_printk buffers if a trace_printk exists */
- if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
+ if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
/* Must be called before global_trace.buffer is allocated */
trace_printk_init_buffers();
@@ -9245,6 +9302,11 @@
{
/* sched_clock_stable() is determined in late_initcall */
if (!trace_boot_clock && !sched_clock_stable()) {
+ if (security_locked_down(LOCKDOWN_TRACEFS)) {
+ pr_warn("Can not set tracing clock due to lockdown\n");
+ return -EPERM;
+ }
+
printk(KERN_WARNING
"Unstable clock detected, switching default tracing clock to \"global\"\n"
"If you want to keep using the local clock, then add:\n"