Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38b8ce0..9c073db 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/block_dev.c
*
@@ -25,12 +26,11 @@
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
#include <linux/uio.h>
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/dax.h>
-#include <linux/badblocks.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
#include <linux/uaccess.h>
@@ -104,6 +104,20 @@
}
EXPORT_SYMBOL(invalidate_bdev);
+static void set_init_blocksize(struct block_device *bdev)
+{
+ unsigned bsize = bdev_logical_block_size(bdev);
+ loff_t size = i_size_read(bdev->bd_inode);
+
+ while (bsize < PAGE_SIZE) {
+ if (size & bsize)
+ break;
+ bsize <<= 1;
+ }
+ bdev->bd_block_size = bsize;
+ bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
+
int set_blocksize(struct block_device *bdev, int size)
{
/* Size must be a power of two, and between 512 and PAGE_SIZE */
@@ -181,7 +195,7 @@
struct task_struct *waiter = bio->bi_private;
WRITE_ONCE(bio->bi_private, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
static ssize_t
@@ -190,13 +204,12 @@
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs, *bvec;
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
struct bio bio;
ssize_t ret;
blk_qc_t qc;
- int i;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
@@ -232,6 +245,8 @@
bio.bi_opf = dio_bio_write_op(iocb);
task_io_account_write(ret);
}
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio_set_polled(&bio, iocb);
qc = submit_bio(&bio);
for (;;) {
@@ -239,17 +254,12 @@
if (!READ_ONCE(bio.bi_private))
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
- bio_for_each_segment_all(bvec, &bio, i) {
- if (should_dirty && !PageCompound(bvec->bv_page))
- set_page_dirty_lock(bvec->bv_page);
- put_page(bvec->bv_page);
- }
-
+ bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
ret = blk_status_to_errno(bio.bi_status);
@@ -277,15 +287,23 @@
static struct bio_set blkdev_dio_pool;
+static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
+{
+ struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
+}
+
static void blkdev_bio_end_io(struct bio *bio)
{
struct blkdev_dio *dio = bio->bi_private;
bool should_dirty = dio->should_dirty;
- if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
- if (bio->bi_status && !dio->bio.bi_status)
- dio->bio.bi_status = bio->bi_status;
- } else {
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
+
+ if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
if (!dio->is_sync) {
struct kiocb *iocb = dio->iocb;
ssize_t ret;
@@ -298,23 +316,20 @@
}
dio->iocb->ki_complete(iocb, ret, 0);
- bio_put(&dio->bio);
+ if (dio->multi_bio)
+ bio_put(&dio->bio);
} else {
struct task_struct *waiter = dio->waiter;
WRITE_ONCE(dio->waiter, NULL);
- wake_up_process(waiter);
+ blk_wake_io_task(waiter);
}
}
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
- struct bio_vec *bvec;
- int i;
-
- bio_for_each_segment_all(bvec, bio, i)
- put_page(bvec->bv_page);
+ bio_release_pages(bio, false);
bio_put(bio);
}
}
@@ -328,6 +343,7 @@
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
+ bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
@@ -338,20 +354,27 @@
return -EINVAL;
bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
- bio_get(bio); /* extra ref for the completion handler */
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
- if (dio->is_sync)
+ if (dio->is_sync) {
dio->waiter = current;
- else
+ bio_get(bio);
+ } else {
dio->iocb = iocb;
+ }
dio->size = 0;
dio->multi_bio = false;
- dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+ dio->should_dirty = is_read && iter_is_iovec(iter);
- blk_start_plug(&plug);
+ /*
+ * Don't plug for HIPRI/polled IO, as those should go straight
+ * to issue
+ */
+ if (!is_poll)
+ blk_start_plug(&plug);
+
for (;;) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
@@ -381,11 +404,28 @@
nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
if (!nr_pages) {
+ bool polled = false;
+
+ if (iocb->ki_flags & IOCB_HIPRI) {
+ bio_set_polled(bio, iocb);
+ polled = true;
+ }
+
qc = submit_bio(bio);
+
+ if (polled)
+ WRITE_ONCE(iocb->ki_cookie, qc);
break;
}
if (!dio->multi_bio) {
+ /*
+ * AIO needs an extra reference to ensure the dio
+ * structure which is embedded into the first bio
+ * stays around.
+ */
+ if (!is_sync)
+ bio_get(bio);
dio->multi_bio = true;
atomic_set(&dio->ref, 2);
} else {
@@ -395,7 +435,9 @@
submit_bio(bio);
bio = bio_alloc(GFP_KERNEL, nr_pages);
}
- blk_finish_plug(&plug);
+
+ if (!is_poll)
+ blk_finish_plug(&plug);
if (!is_sync)
return -EIOCBQUEUED;
@@ -406,7 +448,7 @@
break;
if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc))
+ !blk_poll(bdev_get_queue(bdev), qc, true))
io_schedule();
}
__set_current_state(TASK_RUNNING);
@@ -733,17 +775,9 @@
return &ei->vfs_inode;
}
-static void bdev_i_callback(struct rcu_head *head)
+static void bdev_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
- struct bdev_inode *bdi = BDEV_I(inode);
-
- kmem_cache_free(bdev_cachep, bdi);
-}
-
-static void bdev_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, bdev_i_callback);
+ kmem_cache_free(bdev_cachep, BDEV_I(inode));
}
static void init_once(void *foo)
@@ -783,24 +817,24 @@
static const struct super_operations bdev_sops = {
.statfs = simple_statfs,
.alloc_inode = bdev_alloc_inode,
- .destroy_inode = bdev_destroy_inode,
+ .free_inode = bdev_free_inode,
.drop_inode = generic_delete_inode,
.evict_inode = bdev_evict_inode,
};
-static struct dentry *bd_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int bd_init_fs_context(struct fs_context *fc)
{
- struct dentry *dent;
- dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
- if (!IS_ERR(dent))
- dent->d_sb->s_iflags |= SB_I_CGROUPWB;
- return dent;
+ struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ fc->s_iflags |= SB_I_CGROUPWB;
+ ctx->ops = &bdev_sops;
+ return 0;
}
static struct file_system_type bd_type = {
.name = "bdev",
- .mount = bd_mount,
+ .init_fs_context = bd_init_fs_context,
.kill_sb = kill_anon_super,
};
@@ -1105,8 +1139,7 @@
* Pointer to the block device containing @bdev on success, ERR_PTR()
* value on failure.
*/
-static struct block_device *bd_start_claiming(struct block_device *bdev,
- void *holder)
+struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
{
struct gendisk *disk;
struct block_device *whole;
@@ -1153,6 +1186,62 @@
return ERR_PTR(err);
}
}
+EXPORT_SYMBOL(bd_start_claiming);
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+ lockdep_assert_held(&bdev_lock);
+ /* tell others that we're done */
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ /*
+ * Note that for a whole device bd_holders will be incremented twice,
+ * and bd_holder will be set to bd_may_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_may_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+ bd_clear_claiming(whole, holder);
+ spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_finish_claiming);
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ spin_lock(&bdev_lock);
+ bd_clear_claiming(whole, holder);
+ spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
#ifdef CONFIG_SYSFS
struct bd_holder_disk {
@@ -1360,20 +1449,27 @@
*/
int revalidate_disk(struct gendisk *disk)
{
- struct block_device *bdev;
int ret = 0;
if (disk->fops->revalidate_disk)
ret = disk->fops->revalidate_disk(disk);
- bdev = bdget_disk(disk, 0);
- if (!bdev)
- return ret;
- mutex_lock(&bdev->bd_mutex);
- check_disk_size_change(disk, bdev, ret == 0);
- bdev->bd_invalidated = 0;
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
+ /*
+ * Hidden disks don't have associated bdev so there's no point in
+ * revalidating it.
+ */
+ if (!(disk->flags & GENHD_FL_HIDDEN)) {
+ struct block_device *bdev = bdget_disk(disk, 0);
+
+ if (!bdev)
+ return ret;
+
+ mutex_lock(&bdev->bd_mutex);
+ check_disk_size_change(disk, bdev, ret == 0);
+ bdev->bd_invalidated = 0;
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ }
return ret;
}
EXPORT_SYMBOL(revalidate_disk);
@@ -1408,18 +1504,9 @@
void bd_set_size(struct block_device *bdev, loff_t size)
{
- unsigned bsize = bdev_logical_block_size(bdev);
-
inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
inode_unlock(bdev->bd_inode);
- while (bsize < PAGE_SIZE) {
- if (size & bsize)
- break;
- bsize <<= 1;
- }
- bdev->bd_block_size = bsize;
- bdev->bd_inode->i_blkbits = blksize_bits(bsize);
}
EXPORT_SYMBOL(bd_set_size);
@@ -1496,8 +1583,10 @@
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ set_init_blocksize(bdev);
+ }
/*
* If the device is invalidated, rescan partition
@@ -1532,6 +1621,7 @@
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+ set_init_blocksize(bdev);
}
if (bdev->bd_bdi == &noop_backing_dev_info)
@@ -1621,29 +1711,10 @@
/* finish claiming */
mutex_lock(&bdev->bd_mutex);
- spin_lock(&bdev_lock);
-
- if (!res) {
- BUG_ON(!bd_may_claim(bdev, whole, holder));
- /*
- * Note that for a whole device bd_holders
- * will be incremented twice, and bd_holder
- * will be set to bd_may_claim before being
- * set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_may_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- /* tell others that we're done */
- BUG_ON(whole->bd_claiming != holder);
- whole->bd_claiming = NULL;
- wake_up_bit(&whole->bd_claiming, 0);
-
- spin_unlock(&bdev_lock);
-
+ if (!res)
+ bd_finish_claiming(bdev, whole, holder);
+ else
+ bd_abort_claiming(bdev, whole, holder);
/*
* Block event polling for write claims if requested. Any
* write holder makes the write_holder state stick until
@@ -1901,6 +1972,9 @@
if (bdev_read_only(I_BDEV(bd_inode)))
return -EPERM;
+ if (IS_SWAPFILE(bd_inode))
+ return -ETXTBSY;
+
if (!iov_iter_count(from))
return 0;
@@ -1966,6 +2040,7 @@
.writepages = blkdev_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
+ .migratepage = buffer_migrate_page_norefs,
.is_dirty_writeback = buffer_check_dirty_writeback,
};
@@ -2044,6 +2119,7 @@
.llseek = block_llseek,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
+ .iopoll = blkdev_iopoll,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,