Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 884333b..b89ff18 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -67,19 +67,18 @@
* Send out a notify message.
*/
static void trace_note(struct blk_trace *bt, pid_t pid, int action,
- const void *data, size_t len,
- union kernfs_node_id *cgid)
+ const void *data, size_t len, u64 cgid)
{
struct blk_io_trace *t;
struct ring_buffer_event *event = NULL;
- struct ring_buffer *buffer = NULL;
+ struct trace_buffer *buffer = NULL;
int pc = 0;
int cpu = smp_processor_id();
bool blk_tracer = blk_tracer_enabled;
- ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
+ ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
if (blk_tracer) {
- buffer = blk_tr->trace_buffer.buffer;
+ buffer = blk_tr->array_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + len + cgid_len,
@@ -103,8 +102,8 @@
t->pid = pid;
t->cpu = cpu;
t->pdu_len = len + cgid_len;
- if (cgid)
- memcpy((void *)t + sizeof(*t), cgid, cgid_len);
+ if (cgid_len)
+ memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
memcpy((void *) t + sizeof(*t) + cgid_len, data, len);
if (blk_tracer)
@@ -125,7 +124,7 @@
spin_lock_irqsave(&running_trace_lock, flags);
list_for_each_entry(bt, &running_trace_list, running_list) {
trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm,
- sizeof(tsk->comm), NULL);
+ sizeof(tsk->comm), 0);
}
spin_unlock_irqrestore(&running_trace_lock, flags);
}
@@ -142,7 +141,7 @@
words[1] = now.tv_nsec;
local_irq_save(flags);
- trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), NULL);
+ trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0);
local_irq_restore(flags);
}
@@ -174,10 +173,10 @@
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
blkcg = NULL;
#ifdef CONFIG_BLK_CGROUP
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n,
- blkcg ? cgroup_get_kernfs_id(blkcg->css.cgroup) : NULL);
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
+ blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
#else
- trace_note(bt, 0, BLK_TN_MESSAGE, buf, n, NULL);
+ trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, 0);
#endif
local_irq_restore(flags);
}
@@ -215,18 +214,18 @@
*/
static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
int op, int op_flags, u32 what, int error, int pdu_len,
- void *pdu_data, union kernfs_node_id *cgid)
+ void *pdu_data, u64 cgid)
{
struct task_struct *tsk = current;
struct ring_buffer_event *event = NULL;
- struct ring_buffer *buffer = NULL;
+ struct trace_buffer *buffer = NULL;
struct blk_io_trace *t;
unsigned long flags = 0;
unsigned long *sequence;
pid_t pid;
int cpu, pc = 0;
bool blk_tracer = blk_tracer_enabled;
- ssize_t cgid_len = cgid ? sizeof(*cgid) : 0;
+ ssize_t cgid_len = cgid ? sizeof(cgid) : 0;
if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer))
return;
@@ -252,7 +251,7 @@
if (blk_tracer) {
tracing_record_cmdline(current);
- buffer = blk_tr->trace_buffer.buffer;
+ buffer = blk_tr->array_buffer.buffer;
pc = preempt_count();
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
sizeof(*t) + pdu_len + cgid_len,
@@ -297,7 +296,7 @@
t->pdu_len = pdu_len + cgid_len;
if (cgid_len)
- memcpy((void *)t + sizeof(*t), cgid, cgid_len);
+ memcpy((void *)t + sizeof(*t), &cgid, cgid_len);
if (pdu_len)
memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len);
@@ -348,7 +347,8 @@
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->debugfs_mutex));
if (!bt)
return -EINVAL;
@@ -362,9 +362,9 @@
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_remove(q);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -483,12 +483,11 @@
struct dentry *dir = NULL;
int ret;
+ lockdep_assert_held(&q->debugfs_mutex);
+
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
- if (!blk_debugfs_root)
- return -ENOENT;
-
strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
@@ -502,7 +501,8 @@
* bdev can be NULL, as with scsi-generic, this is a helpful as
* we can be.
*/
- if (q->blk_trace) {
+ if (rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->debugfs_mutex))) {
pr_warn("Concurrent blktraces are not allowed on %s\n",
buts->name);
return -EBUSY;
@@ -521,18 +521,15 @@
if (!bt->msg_data)
goto err;
-#ifdef CONFIG_BLK_DEBUG_FS
/*
- * When tracing whole make_request drivers (multiqueue) block devices,
- * reuse the existing debugfs directory created by the block layer on
- * init. For request-based block devices, all partitions block devices,
+ * When tracing the whole disk reuse the existing debugfs directory
+ * created by the block layer on init. For partitions block devices,
* and scsi-generic block devices we create a temporary new debugfs
* directory that will be removed once the trace ends.
*/
- if (queue_is_mq(q) && bdev && bdev == bdev->bd_contains)
+ if (bdev && !bdev_is_partition(bdev))
dir = q->debugfs_dir;
else
-#endif
bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root);
/*
@@ -577,10 +574,7 @@
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto err;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
ret = 0;
@@ -617,9 +611,9 @@
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_setup(q, name, dev, bdev, arg);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -665,7 +659,7 @@
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (bt == NULL)
return -EINVAL;
@@ -705,9 +699,9 @@
{
int ret;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
ret = __blk_trace_startstop(q, start);
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -736,7 +730,7 @@
if (!q)
return -ENXIO;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
switch (cmd) {
case BLKTRACESETUP:
@@ -751,7 +745,7 @@
#endif
case BLKTRACESTART:
start = 1;
- /* fall through */
+ fallthrough;
case BLKTRACESTOP:
ret = __blk_trace_startstop(q, start);
break;
@@ -763,7 +757,7 @@
break;
}
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
return ret;
}
@@ -774,44 +768,42 @@
**/
void blk_trace_shutdown(struct request_queue *q)
{
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex))) {
+ lockdep_is_held(&q->debugfs_mutex))) {
__blk_trace_startstop(q, 0);
__blk_trace_remove(q);
}
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
}
#ifdef CONFIG_BLK_CGROUP
-static union kernfs_node_id *
-blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
struct blk_trace *bt;
/* We don't use the 'bt' value here except as an optimization... */
bt = rcu_dereference_protected(q->blk_trace, 1);
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
- return NULL;
+ return 0;
if (!bio->bi_blkg)
- return NULL;
- return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+ return 0;
+ return cgroup_id(bio_blkcg(bio)->css.cgroup);
}
#else
-static union kernfs_node_id *
-blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
+static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
{
- return NULL;
+ return 0;
}
#endif
-static union kernfs_node_id *
+static u64
blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)
{
if (!rq->bio)
- return NULL;
+ return 0;
/* Use the first bio */
return blk_trace_bio_get_cgid(q, rq->bio);
}
@@ -833,8 +825,7 @@
*
**/
static void blk_add_trace_rq(struct request *rq, int error,
- unsigned int nr_bytes, u32 what,
- union kernfs_node_id *cgid)
+ unsigned int nr_bytes, u32 what, u64 cgid)
{
struct blk_trace *bt;
@@ -869,6 +860,13 @@
blk_trace_request_get_cgid(q, rq));
}
+static void blk_add_trace_rq_merge(void *ignore,
+ struct request_queue *q, struct request *rq)
+{
+ blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE,
+ blk_trace_request_get_cgid(q, rq));
+}
+
static void blk_add_trace_rq_requeue(void *ignore,
struct request_queue *q,
struct request *rq)
@@ -920,10 +918,10 @@
}
static void blk_add_trace_bio_complete(void *ignore,
- struct request_queue *q, struct bio *bio,
- int error)
+ struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
+ blk_status_to_errno(bio->bi_status));
}
static void blk_add_trace_bio_backmerge(void *ignore,
@@ -961,7 +959,7 @@
bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,
- NULL, NULL);
+ NULL, 0);
rcu_read_unlock();
}
}
@@ -980,7 +978,7 @@
bt = rcu_dereference(q->blk_trace);
if (bt)
__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,
- 0, 0, NULL, NULL);
+ 0, 0, NULL, 0);
rcu_read_unlock();
}
}
@@ -992,7 +990,7 @@
rcu_read_lock();
bt = rcu_dereference(q->blk_trace);
if (bt)
- __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL);
+ __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0);
rcu_read_unlock();
}
@@ -1012,7 +1010,7 @@
else
what = BLK_TA_UNPLUG_TIMER;
- __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL);
+ __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);
}
rcu_read_unlock();
}
@@ -1153,6 +1151,8 @@
WARN_ON(ret);
ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
WARN_ON(ret);
+ ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
+ WARN_ON(ret);
ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
WARN_ON(ret);
ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
@@ -1199,6 +1199,7 @@
unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce, NULL);
unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL);
unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL);
+ unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL);
unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL);
unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL);
@@ -1251,19 +1252,17 @@
static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg)
{
- return (void *)(te_blk_io_trace(ent) + 1) +
- (has_cg ? sizeof(union kernfs_node_id) : 0);
+ return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0);
}
-static inline const void *cgid_start(const struct trace_entry *ent)
+static inline u64 t_cgid(const struct trace_entry *ent)
{
- return (void *)(te_blk_io_trace(ent) + 1);
+ return *(u64 *)(te_blk_io_trace(ent) + 1);
}
static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg)
{
- return te_blk_io_trace(ent)->pdu_len -
- (has_cg ? sizeof(union kernfs_node_id) : 0);
+ return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0);
}
static inline u32 t_action(const struct trace_entry *ent)
@@ -1325,7 +1324,7 @@
fill_rwbs(rwbs, t);
if (has_cg) {
- const union kernfs_node_id *id = cgid_start(iter->ent);
+ u64 id = t_cgid(iter->ent);
if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) {
char blkcg_name_buf[NAME_MAX + 1] = "<...>";
@@ -1335,11 +1334,25 @@
trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ",
MAJOR(t->device), MINOR(t->device),
blkcg_name_buf, act, rwbs);
- } else
+ } else {
+ /*
+ * The cgid portion used to be "INO,GEN". Userland
+ * builds a FILEID_INO32_GEN fid out of them and
+ * opens the cgroup using open_by_handle_at(2).
+ * While 32bit ino setups are still the same, 64bit
+ * ones now use the 64bit ino as the whole ID and
+ * no longer use generation.
+ *
+ * Regarldess of the content, always output
+ * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can
+ * be mapped back to @id on both 64 and 32bit ino
+ * setups. See __kernfs_fh_to_dentry().
+ */
trace_seq_printf(&iter->seq,
- "%3d,%-3d %x,%-x %2s %3s ",
+ "%3d,%-3d %llx,%-llx %2s %3s ",
MAJOR(t->device), MINOR(t->device),
- id->ino, id->generation, act, rwbs);
+ id & U32_MAX, id >> 32, act, rwbs);
+ }
} else
trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ",
MAJOR(t->device), MINOR(t->device), act, rwbs);
@@ -1652,10 +1665,19 @@
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->debugfs_mutex));
if (bt == NULL)
return -EINVAL;
+ if (bt->trace_state == Blktrace_running) {
+ bt->trace_state = Blktrace_stopped;
+ spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ spin_unlock_irq(&running_trace_lock);
+ relay_flush(bt->rchan);
+ }
+
put_probe_ref();
synchronize_rcu();
blk_trace_free(bt);
@@ -1684,10 +1706,7 @@
blk_trace_setup_lba(bt, bdev);
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto free_bt;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
return 0;
@@ -1816,13 +1835,11 @@
struct device_attribute *attr,
char *buf)
{
- struct hd_struct *p = dev_to_part(dev);
+ struct block_device *bdev = bdget_part(dev_to_part(dev));
struct request_queue *q;
- struct block_device *bdev;
struct blk_trace *bt;
ssize_t ret = -ENXIO;
- bdev = bdget(part_devt(p));
if (bdev == NULL)
goto out;
@@ -1830,10 +1847,10 @@
if (q == NULL)
goto out_bdput;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (attr == &dev_attr_enable) {
ret = sprintf(buf, "%u\n", !!bt);
goto out_unlock_bdev;
@@ -1851,7 +1868,7 @@
ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev:
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
out_bdput:
bdput(bdev);
out:
@@ -1864,7 +1881,6 @@
{
struct block_device *bdev;
struct request_queue *q;
- struct hd_struct *p;
struct blk_trace *bt;
u64 value;
ssize_t ret = -EINVAL;
@@ -1884,9 +1900,7 @@
goto out;
ret = -ENXIO;
-
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
+ bdev = bdget_part(dev_to_part(dev));
if (bdev == NULL)
goto out;
@@ -1894,10 +1908,10 @@
if (q == NULL)
goto out_bdput;
- mutex_lock(&q->blk_trace_mutex);
+ mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
if (attr == &dev_attr_enable) {
if (!!value == !!bt) {
ret = 0;
@@ -1914,7 +1928,7 @@
if (bt == NULL) {
ret = blk_trace_setup_queue(q, bdev);
bt = rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->blk_trace_mutex));
+ lockdep_is_held(&q->debugfs_mutex));
}
if (ret == 0) {
@@ -1929,7 +1943,7 @@
}
out_unlock_bdev:
- mutex_unlock(&q->blk_trace_mutex);
+ mutex_unlock(&q->debugfs_mutex);
out_bdput:
bdput(bdev);
out: