Update Linux to v5.10.157
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.157.tar.xz
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
Change-Id: I7b30d9e98d8c465d6b44de8e7433b4a40b3289ba
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b89ff18..15a376f 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -800,12 +800,12 @@
#endif
static u64
-blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
+blk_trace_request_get_cgid(struct request *rq)
{
if (!rq->bio)
return 0;
/* Use the first bio */
- return blk_trace_bio_get_cgid(q, rq->bio);
+ return blk_trace_bio_get_cgid(rq->q, rq->bio);
}
/*
@@ -846,40 +846,35 @@
rcu_read_unlock();
}
-static void blk_add_trace_rq_insert(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_insert(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_issue(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_issue(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_merge(void *ignore,
- struct request_queue *q, struct request *rq)
+static void blk_add_trace_rq_merge(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
-static void blk_add_trace_rq_requeue(void *ignore,
- struct request_queue *q,
- struct request *rq)
+static void blk_add_trace_rq_requeue(void *ignore, struct request *rq)
{
blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
}
static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
int error, unsigned int nr_bytes)
{
blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE,
- blk_trace_request_get_cgid(rq->q, rq));
+ blk_trace_request_get_cgid(rq));
}
/**
@@ -1087,16 +1082,14 @@
* Add a trace for that action.
*
**/
-static void blk_add_trace_rq_remap(void *ignore,
- struct request_queue *q,
- struct request *rq, dev_t dev,
+static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev,
sector_t from)
{
struct blk_trace *bt;
struct blk_io_trace_remap r;
rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
+ bt = rcu_dereference(rq->q->blk_trace);
if (likely(!bt)) {
rcu_read_unlock();
return;
@@ -1107,14 +1100,13 @@
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
- rq_data_dir(rq), 0, BLK_TA_REMAP, 0,
- sizeof(r), &r, blk_trace_request_get_cgid(q, rq));
+ req_op(rq), rq->cmd_flags, BLK_TA_REMAP, 0,
+ sizeof(r), &r, blk_trace_request_get_cgid(rq));
rcu_read_unlock();
}
/**
* blk_add_driver_data - Add binary message with driver-specific data
- * @q: queue the io is for
* @rq: io request
* @data: driver-specific data
* @len: length of driver-specific data
@@ -1123,14 +1115,12 @@
* Some drivers might want to write driver-specific data per request.
*
**/
-void blk_add_driver_data(struct request_queue *q,
- struct request *rq,
- void *data, size_t len)
+void blk_add_driver_data(struct request *rq, void *data, size_t len)
{
struct blk_trace *bt;
rcu_read_lock();
- bt = rcu_dereference(q->blk_trace);
+ bt = rcu_dereference(rq->q->blk_trace);
if (likely(!bt)) {
rcu_read_unlock();
return;
@@ -1138,7 +1128,7 @@
__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,
BLK_TA_DRV_DATA, 0, len, data,
- blk_trace_request_get_cgid(q, rq));
+ blk_trace_request_get_cgid(rq));
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4a5d35d..d97c189 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1295,6 +1295,7 @@
if (!ftrace_mod)
return -ENOMEM;
+ INIT_LIST_HEAD(&ftrace_mod->list);
ftrace_mod->func = kstrdup(func, GFP_KERNEL);
ftrace_mod->module = kstrdup(module, GFP_KERNEL);
ftrace_mod->enable = enable;
@@ -2899,6 +2900,16 @@
ftrace_startup_enable(command);
+ /*
+ * If ftrace is in an undefined state, we just remove ops from list
+ * to prevent the NULL pointer, instead of totally rolling it back and
+ * free trampoline, because those actions could cause further damage.
+ */
+ if (unlikely(ftrace_disabled)) {
+ __unregister_ftrace_function(ops);
+ return -ENODEV;
+ }
+
ops->flags &= ~FTRACE_OPS_FL_ADDING;
return 0;
@@ -2936,18 +2947,8 @@
command |= FTRACE_UPDATE_TRACE_FUNC;
}
- if (!command || !ftrace_enabled) {
- /*
- * If these are dynamic or per_cpu ops, they still
- * need their data freed. Since, function tracing is
- * not currently active, we can just free them
- * without synchronizing all CPUs.
- */
- if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
- goto free_ops;
-
- return 0;
- }
+ if (!command || !ftrace_enabled)
+ goto out;
/*
* If the ops uses a trampoline, then it needs to be
@@ -2984,6 +2985,7 @@
removed_ops = NULL;
ops->flags &= ~FTRACE_OPS_FL_REMOVING;
+out:
/*
* Dynamic ops may be freed, we must make sure that all
* callers are done before leaving this function.
@@ -3011,7 +3013,6 @@
if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
- free_ops:
ftrace_trampoline_free(ops);
}
@@ -3178,7 +3179,7 @@
/* if we can't allocate this size, try something smaller */
if (!order)
return -ENOMEM;
- order >>= 1;
+ order--;
goto again;
}
@@ -4427,7 +4428,7 @@
* @ip: The instruction pointer address to remove the data from
*
* Returns the data if it is found, otherwise NULL.
- * Note, if the data pointer is used as the data itself, (see
+ * Note, if the data pointer is used as the data itself, (see
* ftrace_func_mapper_find_ip(), then the return value may be meaningless,
* if the data pointer was set to zero.
*/
@@ -5153,8 +5154,6 @@
__add_hash_entry(direct_functions, entry);
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
- if (ret)
- remove_hash_entry(direct_functions, entry);
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
ret = register_ftrace_function(&direct_ops);
@@ -5163,6 +5162,7 @@
}
if (ret) {
+ remove_hash_entry(direct_functions, entry);
kfree(entry);
if (!direct->count) {
list_del_rcu(&direct->next);
@@ -5653,8 +5653,12 @@
if (filter_hash) {
orig_hash = &iter->ops->func_hash->filter_hash;
- if (iter->tr && !list_empty(&iter->tr->mod_trace))
- iter->hash->flags |= FTRACE_HASH_FL_MOD;
+ if (iter->tr) {
+ if (list_empty(&iter->tr->mod_trace))
+ iter->hash->flags &= ~FTRACE_HASH_FL_MOD;
+ else
+ iter->hash->flags |= FTRACE_HASH_FL_MOD;
+ }
} else
orig_hash = &iter->ops->func_hash->notrace_hash;
@@ -6874,7 +6878,7 @@
}
pr_info("ftrace: allocating %ld entries in %ld pages\n",
- count, count / ENTRIES_PER_PAGE + 1);
+ count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
last_ftrace_enabled = ftrace_enabled = 1;
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index 18b0f1c..c736487 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -35,6 +35,49 @@
static struct trace_event_file *gen_kprobe_test;
static struct trace_event_file *gen_kretprobe_test;
+#define KPROBE_GEN_TEST_FUNC "do_sys_open"
+
+/* X86 */
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%ax"
+#define KPROBE_GEN_TEST_ARG1 "filename=%dx"
+#define KPROBE_GEN_TEST_ARG2 "flags=%cx"
+#define KPROBE_GEN_TEST_ARG3 "mode=+4($stack)"
+
+/* ARM64 */
+#elif defined(CONFIG_ARM64)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%x0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%x1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%x2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%x3"
+
+/* ARM */
+#elif defined(CONFIG_ARM)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%r0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%r1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%r2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%r3"
+
+/* RISCV */
+#elif defined(CONFIG_RISCV)
+#define KPROBE_GEN_TEST_ARG0 "dfd=%a0"
+#define KPROBE_GEN_TEST_ARG1 "filename=%a1"
+#define KPROBE_GEN_TEST_ARG2 "flags=%a2"
+#define KPROBE_GEN_TEST_ARG3 "mode=%a3"
+
+/* others */
+#else
+#define KPROBE_GEN_TEST_ARG0 NULL
+#define KPROBE_GEN_TEST_ARG1 NULL
+#define KPROBE_GEN_TEST_ARG2 NULL
+#define KPROBE_GEN_TEST_ARG3 NULL
+#endif
+
+static bool trace_event_file_is_valid(struct trace_event_file *input)
+{
+ return input && !IS_ERR(input);
+}
+
/*
* Test to make sure we can create a kprobe event, then add more
* fields.
@@ -58,23 +101,23 @@
* fields.
*/
ret = kprobe_event_gen_cmd_start(&cmd, "gen_kprobe_test",
- "do_sys_open",
- "dfd=%ax", "filename=%dx");
+ KPROBE_GEN_TEST_FUNC,
+ KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
if (ret)
- goto free;
+ goto out;
/* Use kprobe_event_add_fields to add the rest of the fields */
- ret = kprobe_event_add_fields(&cmd, "flags=%cx", "mode=+4($stack)");
+ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
if (ret)
- goto free;
+ goto out;
/*
* This actually creates the event.
*/
ret = kprobe_event_gen_cmd_end(&cmd);
if (ret)
- goto free;
+ goto out;
/*
* Now get the gen_kprobe_test event file. We need to prevent
@@ -97,13 +140,13 @@
goto delete;
}
out:
+ kfree(buf);
return ret;
delete:
+ if (trace_event_file_is_valid(gen_kprobe_test))
+ gen_kprobe_test = NULL;
/* We got an error after creating the event, delete it */
ret = kprobe_event_delete("gen_kprobe_test");
- free:
- kfree(buf);
-
goto out;
}
@@ -128,17 +171,17 @@
* Define the kretprobe event.
*/
ret = kretprobe_event_gen_cmd_start(&cmd, "gen_kretprobe_test",
- "do_sys_open",
+ KPROBE_GEN_TEST_FUNC,
"$retval");
if (ret)
- goto free;
+ goto out;
/*
* This actually creates the event.
*/
ret = kretprobe_event_gen_cmd_end(&cmd);
if (ret)
- goto free;
+ goto out;
/*
* Now get the gen_kretprobe_test event file. We need to
@@ -162,13 +205,13 @@
goto delete;
}
out:
+ kfree(buf);
return ret;
delete:
+ if (trace_event_file_is_valid(gen_kretprobe_test))
+ gen_kretprobe_test = NULL;
/* We got an error after creating the event, delete it */
ret = kprobe_event_delete("gen_kretprobe_test");
- free:
- kfree(buf);
-
goto out;
}
@@ -182,10 +225,12 @@
ret = test_gen_kretprobe_cmd();
if (ret) {
- WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
- "kprobes",
- "gen_kretprobe_test", false));
- trace_put_event_file(gen_kretprobe_test);
+ if (trace_event_file_is_valid(gen_kretprobe_test)) {
+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
+ "kprobes",
+ "gen_kretprobe_test", false));
+ trace_put_event_file(gen_kretprobe_test);
+ }
WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
}
@@ -194,24 +239,30 @@
static void __exit kprobe_event_gen_test_exit(void)
{
- /* Disable the event or you can't remove it */
- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
- "kprobes",
- "gen_kprobe_test", false));
+ if (trace_event_file_is_valid(gen_kprobe_test)) {
+ /* Disable the event or you can't remove it */
+ WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
+ "kprobes",
+ "gen_kprobe_test", false));
- /* Now give the file and instance back */
- trace_put_event_file(gen_kprobe_test);
+ /* Now give the file and instance back */
+ trace_put_event_file(gen_kprobe_test);
+ }
+
/* Now unregister and free the event */
WARN_ON(kprobe_event_delete("gen_kprobe_test"));
- /* Disable the event or you can't remove it */
- WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr,
- "kprobes",
- "gen_kretprobe_test", false));
+ if (trace_event_file_is_valid(gen_kretprobe_test)) {
+ /* Disable the event or you can't remove it */
+ WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr,
+ "kprobes",
+ "gen_kretprobe_test", false));
- /* Now give the file and instance back */
- trace_put_event_file(gen_kretprobe_test);
+ /* Now give the file and instance back */
+ trace_put_event_file(gen_kretprobe_test);
+ }
+
/* Now unregister and free the event */
WARN_ON(kprobe_event_delete("gen_kretprobe_test"));
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6deac66..49ebb8c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -414,6 +414,7 @@
struct irq_work work;
wait_queue_head_t waiters;
wait_queue_head_t full_waiters;
+ long wait_index;
bool waiters_pending;
bool full_waiters_pending;
bool wakeup_full;
@@ -516,6 +517,7 @@
local_t committing;
local_t commits;
local_t pages_touched;
+ local_t pages_lost;
local_t pages_read;
long last_pages_touch;
size_t shortest_full;
@@ -770,10 +772,18 @@
size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
{
size_t read;
+ size_t lost;
size_t cnt;
read = local_read(&buffer->buffers[cpu]->pages_read);
+ lost = local_read(&buffer->buffers[cpu]->pages_lost);
cnt = local_read(&buffer->buffers[cpu]->pages_touched);
+
+ if (WARN_ON_ONCE(cnt < lost))
+ return 0;
+
+ cnt -= lost;
+
/* The reader can read an empty page, but not more than that */
if (cnt < read) {
WARN_ON_ONCE(read > cnt + 1);
@@ -783,6 +793,21 @@
return cnt - read;
}
+static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
+{
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ size_t nr_pages;
+ size_t dirty;
+
+ nr_pages = cpu_buffer->nr_pages;
+ if (!nr_pages || !full)
+ return true;
+
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+
+ return (dirty * 100) > (full * nr_pages);
+}
+
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@@ -794,13 +819,45 @@
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
wake_up_all(&rbwork->waiters);
- if (rbwork->wakeup_full) {
+ if (rbwork->full_waiters_pending || rbwork->wakeup_full) {
rbwork->wakeup_full = false;
+ rbwork->full_waiters_pending = false;
wake_up_all(&rbwork->full_waiters);
}
}
/**
+ * ring_buffer_wake_waiters - wake up any waiters on this ring buffer
+ * @buffer: The ring buffer to wake waiters on
+ *
+ * In the case of a file that represents a ring buffer is closing,
+ * it is prudent to wake up any waiters that are on this.
+ */
+void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct rb_irq_work *rbwork;
+
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+
+ /* Wake up individual ones too. One level recursion */
+ for_each_buffer_cpu(buffer, cpu)
+ ring_buffer_wake_waiters(buffer, cpu);
+
+ rbwork = &buffer->irq_work;
+ } else {
+ cpu_buffer = buffer->buffers[cpu];
+ rbwork = &cpu_buffer->irq_work;
+ }
+
+ rbwork->wait_index++;
+ /* make sure the waiters see the new index */
+ smp_wmb();
+
+ rb_wake_up_waiters(&rbwork->work);
+}
+
+/**
* ring_buffer_wait - wait for input to the ring buffer
* @buffer: buffer to wait on
* @cpu: the cpu buffer to wait on
@@ -815,6 +872,7 @@
struct ring_buffer_per_cpu *cpu_buffer;
DEFINE_WAIT(wait);
struct rb_irq_work *work;
+ long wait_index;
int ret = 0;
/*
@@ -833,6 +891,7 @@
work = &cpu_buffer->irq_work;
}
+ wait_index = READ_ONCE(work->wait_index);
while (true) {
if (full)
@@ -877,26 +936,29 @@
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
- size_t nr_pages;
- size_t dirty;
+ bool done;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
- nr_pages = cpu_buffer->nr_pages;
- dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ done = !pagebusy && full_hit(buffer, cpu, full);
+
if (!cpu_buffer->shortest_full ||
- cpu_buffer->shortest_full < full)
+ cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
- if (!pagebusy &&
- (!nr_pages || (dirty * 100) > full * nr_pages))
+ if (done)
break;
}
schedule();
+
+ /* Make sure to see the new wait index */
+ smp_rmb();
+ if (wait_index != work->wait_index)
+ break;
}
if (full)
@@ -913,6 +975,7 @@
* @cpu: the cpu buffer to wait on
* @filp: the file descriptor
* @poll_table: The poll descriptor
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
@@ -922,14 +985,15 @@
* zero otherwise.
*/
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
- struct file *filp, poll_table *poll_table)
+ struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
- if (cpu == RING_BUFFER_ALL_CPUS)
+ if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
- else {
+ full = 0;
+ } else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
@@ -937,8 +1001,14 @@
work = &cpu_buffer->irq_work;
}
- poll_wait(filp, &work->waiters, poll_table);
- work->waiters_pending = true;
+ if (full) {
+ poll_wait(filp, &work->full_waiters, poll_table);
+ work->full_waiters_pending = true;
+ } else {
+ poll_wait(filp, &work->waiters, poll_table);
+ work->waiters_pending = true;
+ }
+
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@@ -954,6 +1024,9 @@
*/
smp_mb();
+ if (full)
+ return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
+
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
@@ -1595,9 +1668,9 @@
free_buffer_page(cpu_buffer->reader_page);
- rb_head_page_deactivate(cpu_buffer);
-
if (head) {
+ rb_head_page_deactivate(cpu_buffer);
+
list_for_each_entry_safe(bpage, tmp, head, list) {
list_del_init(&bpage->list);
free_buffer_page(bpage);
@@ -1833,6 +1906,7 @@
*/
local_add(page_entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
}
/*
@@ -2323,6 +2397,7 @@
*/
local_add(entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_inc(&cpu_buffer->pages_lost);
/*
* The entries will be zeroed out when we move the
@@ -2491,6 +2566,9 @@
/* Mark the rest of the page with padding */
rb_event_set_padding(event);
+ /* Make sure the padding is visible before the write update */
+ smp_wmb();
+
/* Set the write back to the previous setting */
local_sub(length, &tail_page->write);
return;
@@ -2502,6 +2580,9 @@
/* time delta must be non zero */
event->time_delta = 1;
+ /* Make sure the padding is visible before the tail_page->write update */
+ smp_wmb();
+
/* Set write to end of buffer */
length = (tail + length) - BUF_PAGE_SIZE;
local_sub(length, &tail_page->write);
@@ -2987,10 +3068,6 @@
static __always_inline void
rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
- size_t nr_pages;
- size_t dirty;
- size_t full;
-
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
@@ -3014,10 +3091,7 @@
cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
- full = cpu_buffer->shortest_full;
- nr_pages = cpu_buffer->nr_pages;
- dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
- if (full && nr_pages && (dirty * 100) <= full * nr_pages)
+ if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
return;
cpu_buffer->irq_work.wakeup_full = true;
@@ -4316,6 +4390,33 @@
arch_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
+ /*
+ * The writer has preempt disable, wait for it. But not forever
+ * Although, 1 second is pretty much "forever"
+ */
+#define USECS_WAIT 1000000
+ for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
+ /* If the write is past the end of page, a writer is still updating it */
+ if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
+ break;
+
+ udelay(1);
+
+ /* Get the latest version of the reader write value */
+ smp_rmb();
+ }
+
+ /* The writer is not moving forward? Something is wrong */
+ if (RB_WARN_ON(cpu_buffer, nr_loops == USECS_WAIT))
+ reader = NULL;
+
+ /*
+ * Make sure we see any padding after the write update
+ * (see rb_reset_tail())
+ */
+ smp_rmb();
+
+
return reader;
}
@@ -4891,6 +4992,7 @@
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
local_set(&cpu_buffer->pages_touched, 0);
+ local_set(&cpu_buffer->pages_lost, 0);
local_set(&cpu_buffer->pages_read, 0);
cpu_buffer->last_pages_touch = 0;
cpu_buffer->shortest_full = 0;
@@ -5341,7 +5443,15 @@
unsigned int pos = 0;
unsigned int size;
- if (full)
+ /*
+ * If a full page is expected, this can still be returned
+ * if there's been a previous partial read and the
+ * rest of the page can be read and the commit page is off
+ * the reader page.
+ */
+ if (full &&
+ (!read || (len < (commit - read)) ||
+ cpu_buffer->reader_page == cpu_buffer->commit_page))
goto out_unlock;
if (len > (commit - read))
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index edd912c..a6a2813 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -120,15 +120,13 @@
/* Now generate a gen_synth_test event */
ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals));
- out:
+ free:
+ kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("gen_synth_test");
- free:
- kfree(buf);
-
- goto out;
+ goto free;
}
/*
@@ -227,15 +225,13 @@
/* Now trace an empty_synth_test event */
ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals));
- out:
+ free:
+ kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("empty_synth_test");
- free:
- kfree(buf);
-
- goto out;
+ goto free;
}
static struct synth_field_desc create_synth_test_fields[] = {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 953dd95..146771d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1197,12 +1197,14 @@
{
void *cond_data = NULL;
+ local_irq_disable();
arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot)
cond_data = tr->cond_snapshot->cond_data;
arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
return cond_data;
}
@@ -1338,9 +1340,11 @@
goto fail_unlock;
}
+ local_irq_disable();
arch_spin_lock(&tr->max_lock);
tr->cond_snapshot = cond_snapshot;
arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
mutex_unlock(&trace_types_lock);
@@ -1367,6 +1371,7 @@
{
int ret = 0;
+ local_irq_disable();
arch_spin_lock(&tr->max_lock);
if (!tr->cond_snapshot)
@@ -1377,6 +1382,7 @@
}
arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
return ret;
}
@@ -2198,6 +2204,11 @@
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
+/*
+ * Preemption must be disabled before acquiring trace_cmdline_lock.
+ * The various trace_arrays' max_lock must be acquired in a context
+ * where interrupt is disabled.
+ */
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
struct saved_cmdlines_buffer {
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -2410,7 +2421,11 @@
* the lock, but we also don't want to spin
* nor do we want to disable interrupts,
* so if we miss here, then better luck next time.
+ *
+ * This is called within the scheduler and wake up, so interrupts
+ * had better been disabled and run queue lock been held.
*/
+ lockdep_assert_preemption_disabled();
if (!arch_spin_trylock(&trace_cmdline_lock))
return 0;
@@ -2784,7 +2799,7 @@
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
-static DEFINE_SPINLOCK(tracepoint_iter_lock);
+static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
static DEFINE_MUTEX(tracepoint_printk_mutex);
static void output_printk(struct trace_event_buffer *fbuffer)
@@ -2812,14 +2827,14 @@
event = &fbuffer->trace_file->event_call->event;
- spin_lock_irqsave(&tracepoint_iter_lock, flags);
+ raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
trace_seq_init(&iter->seq);
iter->ent = fbuffer->entry;
event_call->event.funcs->trace(iter, 0, event);
trace_seq_putc(&iter->seq, 0);
printk("%s", iter->seq.buffer);
- spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+ raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
}
int tracepoint_printk_sysctl(struct ctl_table *table, int write,
@@ -5470,9 +5485,11 @@
char buf[64];
int r;
+ preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
@@ -5497,10 +5514,12 @@
return -ENOMEM;
}
+ preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
savedcmd = s;
arch_spin_unlock(&trace_cmdline_lock);
+ preempt_enable();
free_saved_cmdlines_buffer(savedcmd_temp);
return 0;
@@ -5907,12 +5926,18 @@
tr->current_trace = &nop_trace;
}
+static bool tracer_options_updated;
+
static void add_tracer_options(struct trace_array *tr, struct tracer *t)
{
/* Only enable if the directory has been created already. */
if (!tr->dir)
return;
+ /* Only create trace option files after update_tracer_options finish */
+ if (!tracer_options_updated)
+ return;
+
create_trace_option_files(tr, t);
}
@@ -5947,10 +5972,12 @@
#ifdef CONFIG_TRACER_SNAPSHOT
if (t->use_max_tr) {
+ local_irq_disable();
arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot)
ret = -EBUSY;
arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
if (ret)
goto out;
}
@@ -5981,12 +6008,12 @@
if (tr->current_trace->reset)
tr->current_trace->reset(tr);
+#ifdef CONFIG_TRACER_MAX_TRACE
+ had_max_tr = tr->current_trace->use_max_tr;
+
/* Current trace needs to be nop_trace before synchronize_rcu */
tr->current_trace = &nop_trace;
-#ifdef CONFIG_TRACER_MAX_TRACE
- had_max_tr = tr->allocated_snapshot;
-
if (had_max_tr && !t->use_max_tr) {
/*
* We need to make sure that the update_max_tr sees that
@@ -5998,14 +6025,14 @@
synchronize_rcu();
free_snapshot(tr);
}
-#endif
-#ifdef CONFIG_TRACER_MAX_TRACE
- if (t->use_max_tr && !had_max_tr) {
+ if (t->use_max_tr && !tr->allocated_snapshot) {
ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
goto out;
}
+#else
+ tr->current_trace = &nop_trace;
#endif
if (t->init) {
@@ -6236,7 +6263,7 @@
return EPOLLIN | EPOLLRDNORM;
else
return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
- filp, poll_table);
+ filp, poll_table, iter->tr->buffer_percent);
}
static __poll_t
@@ -7024,10 +7051,12 @@
goto out;
}
+ local_irq_disable();
arch_spin_lock(&tr->max_lock);
if (tr->cond_snapshot)
ret = -EBUSY;
arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
if (ret)
goto out;
@@ -8649,6 +8678,7 @@
static void update_tracer_options(struct trace_array *tr)
{
mutex_lock(&trace_types_lock);
+ tracer_options_updated = true;
__update_tracer_options(tr);
mutex_unlock(&trace_types_lock);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7cc5f0a..826ecf0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -168,6 +168,7 @@
__generic_field(int, CPU, FILTER_CPU);
__generic_field(int, cpu, FILTER_CPU);
+ __generic_field(int, common_cpu, FILTER_CPU);
__generic_field(char *, COMM, FILTER_COMM);
__generic_field(char *, comm, FILTER_COMM);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index eb72006..fd54168 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1789,8 +1789,11 @@
return err;
free:
kfree(ref_field->system);
+ ref_field->system = NULL;
kfree(ref_field->event_name);
+ ref_field->event_name = NULL;
kfree(ref_field->name);
+ ref_field->name = NULL;
goto out;
}
@@ -3940,6 +3943,8 @@
s = kstrdup(field_str, GFP_KERNEL);
if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ hist_data->attrs->var_defs.name[n_vars] = NULL;
ret = -ENOMEM;
goto free;
}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 881df99..18291ab 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -791,10 +791,9 @@
}
ret = set_synth_event_print_fmt(call);
- if (ret < 0) {
+ /* unregister_trace_event() will be called inside */
+ if (ret < 0)
trace_remove_event_call(call);
- goto err;
- }
out:
return ret;
err:
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index d0309de..4bc9096 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1219,7 +1219,14 @@
stacktrace_trigger(struct event_trigger_data *data, void *rec,
struct ring_buffer_event *event)
{
- trace_dump_stack(STACK_SKIP);
+ struct trace_event_file *file = data->private_data;
+ unsigned long flags;
+
+ if (file) {
+ local_save_flags(flags);
+ __trace_stack(file->tr, flags, STACK_SKIP, preempt_count());
+ } else
+ trace_dump_stack(STACK_SKIP);
}
static void
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index f493804..3aa55b8 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -94,15 +94,15 @@
this_cpu_write(tracing_irq_cpu, 0);
}
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- lockdep_hardirqs_on(CALLER_ADDR0);
+ lockdep_hardirqs_on_prepare(caller_addr);
+ lockdep_hardirqs_on(caller_addr);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
{
- lockdep_hardirqs_off(CALLER_ADDR0);
+ lockdep_hardirqs_off(caller_addr);
if (!this_cpu_read(tracing_irq_cpu)) {
this_cpu_write(tracing_irq_cpu, 1);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1d31bc4..073abbe 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -300,7 +300,7 @@
}
} else
goto inval_var;
- } else if (strcmp(arg, "comm") == 0) {
+ } else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
code->op = FETCH_OP_COMM;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
} else if (((flags & TPARG_FL_MASK) ==
@@ -595,7 +595,8 @@
* Since $comm and immediate string can not be dereferred,
* we can find those by strcmp.
*/
- if (strcmp(arg, "$comm") == 0 || strncmp(arg, "\\\"", 2) == 0) {
+ if (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
+ strncmp(arg, "\\\"", 2) == 0) {
/* The type of $comm must be "string", and not an array. */
if (parg->count || (t && strcmp(t, "string")))
return -EINVAL;