Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2..1ffb179 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -10,14 +10,11 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
@@ -28,11 +25,10 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
-#include <linux/dcache.h>
#include <linux/falloc.h>
-#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/mman.h>
+#include <linux/fadvise.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -367,40 +363,30 @@
* lock above. Eventually we should look into a way to avoid
* the pointless lock roundtrip.
*/
- if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
- error = file_update_time(file);
- if (error)
- return error;
- }
-
- /*
- * If we're writing the file then make sure to clear the setuid and
- * setgid bits if the process is not being run by root. This keeps
- * people from modifying setuid and setgid binaries.
- */
- if (!IS_NOSEC(inode))
- return file_remove_privs(file);
- return 0;
+ return file_modified(file);
}
static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
+ int error,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
- int error = 0;
+ unsigned int nofs_flag;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (size <= 0)
- return size;
+ if (error)
+ return error;
+ if (!size)
+ return 0;
/*
* Capture amount written on completion as we can't reliably account
@@ -408,10 +394,17 @@
*/
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
- return error;
+ goto out;
}
/*
@@ -420,8 +413,10 @@
* earlier allows a racing dio read to find unwritten extents before
* they are converted.
*/
- if (flags & IOMAP_DIO_UNWRITTEN)
- return xfs_iomap_write_unwritten(ip, offset, size, true);
+ if (flags & IOMAP_DIO_UNWRITTEN) {
+ error = xfs_iomap_write_unwritten(ip, offset, size, true);
+ goto out;
+ }
/*
* We need to update the in-core inode size here so that we don't end up
@@ -443,9 +438,15 @@
spin_unlock(&ip->i_flags_lock);
}
+out:
+ memalloc_nofs_restore(nofs_flag);
return error;
}
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+ .end_io = xfs_dio_write_end_io,
+};
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@@ -507,7 +508,7 @@
* We can't properly handle unaligned direct I/O to reflink
* files yet, as we can't unshare a partial block.
*/
- if (xfs_is_reflink_inode(ip)) {
+ if (xfs_is_cow_inode(ip)) {
trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
return -EREMCHG;
}
@@ -517,6 +518,9 @@
}
if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* unaligned dio always waits, bail */
+ if (unaligned_io)
+ return -EAGAIN;
if (!xfs_ilock_nowait(ip, iolock))
return -EAGAIN;
} else {
@@ -529,25 +533,29 @@
count = iov_iter_count(from);
/*
- * If we are doing unaligned IO, wait for all other IO to drain,
- * otherwise demote the lock if we had to take the exclusive lock
- * for other reasons in xfs_file_aio_write_checks.
+ * If we are doing unaligned IO, we can't allow any other overlapping IO
+ * in-flight at the same time or we risk data corruption. Wait for all
+ * other IO to drain before we submit. If the IO is aligned, demote the
+ * iolock if we had to take the exclusive lock in
+ * xfs_file_aio_write_checks() for other reasons.
*/
if (unaligned_io) {
- /* If we are going to wait for other DIO to finish, bail */
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (atomic_read(&inode->i_dio_count))
- return -EAGAIN;
- } else {
- inode_dio_wait(inode);
- }
+ inode_dio_wait(inode);
} else if (iolock == XFS_IOLOCK_EXCL) {
xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+ ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
+
+ /*
+ * If unaligned, this is the only IO in-flight. If it has not yet
+ * completed, wait on it before we release the iolock to prevent
+ * subsequent overlapping IO.
+ */
+ if (ret == -EIOCBQUEUED && unaligned_io)
+ inode_dio_wait(inode);
out:
xfs_iunlock(ip, iolock);
@@ -872,14 +880,27 @@
goto out_unlock;
}
- if (mode & FALLOC_FL_ZERO_RANGE)
+ if (mode & FALLOC_FL_ZERO_RANGE) {
error = xfs_zero_file_space(ip, offset, len);
- else {
- if (mode & FALLOC_FL_UNSHARE_RANGE) {
- error = xfs_reflink_unshare(ip, offset, len);
- if (error)
- goto out_unlock;
+ } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
+ error = xfs_reflink_unshare(ip, offset, len);
+ if (error)
+ goto out_unlock;
+
+ if (!xfs_is_always_cow_inode(ip)) {
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_PREALLOC);
}
+ } else {
+ /*
+ * If always_cow mode we can't use preallocations and
+ * thus should not create them.
+ */
+ if (xfs_is_always_cow_inode(ip)) {
+ error = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
error = xfs_alloc_file_space(ip, offset, len,
XFS_BMAPI_PREALLOC);
}
@@ -920,27 +941,90 @@
}
STATIC int
-xfs_file_clone_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
+xfs_file_fadvise(
+ struct file *file,
+ loff_t start,
+ loff_t end,
+ int advice)
{
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, false);
+ struct xfs_inode *ip = XFS_I(file_inode(file));
+ int ret;
+ int lockflags = 0;
+
+ /*
+ * Operations creating pages in page cache need protection from hole
+ * punching and similar ops
+ */
+ if (advice == POSIX_FADV_WILLNEED) {
+ lockflags = XFS_IOLOCK_SHARED;
+ xfs_ilock(ip, lockflags);
+ }
+ ret = generic_fadvise(file, start, end, advice);
+ if (lockflags)
+ xfs_iunlock(ip, lockflags);
+ return ret;
}
-STATIC int
-xfs_file_dedupe_range(
- struct file *file_in,
- loff_t pos_in,
- struct file *file_out,
- loff_t pos_out,
- u64 len)
+STATIC loff_t
+xfs_file_remap_range(
+ struct file *file_in,
+ loff_t pos_in,
+ struct file *file_out,
+ loff_t pos_out,
+ loff_t len,
+ unsigned int remap_flags)
{
- return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
- len, true);
+ struct inode *inode_in = file_inode(file_in);
+ struct xfs_inode *src = XFS_I(inode_in);
+ struct inode *inode_out = file_inode(file_out);
+ struct xfs_inode *dest = XFS_I(inode_out);
+ struct xfs_mount *mp = src->i_mount;
+ loff_t remapped = 0;
+ xfs_extlen_t cowextsize;
+ int ret;
+
+ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+ return -EINVAL;
+
+ if (!xfs_sb_version_hasreflink(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ /* Prepare and then clone file data. */
+ ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
+ &len, remap_flags);
+ if (ret < 0 || len == 0)
+ return ret;
+
+ trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
+ ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
+ &remapped);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Carry the cowextsize hint from src to dest if we're sharing the
+ * entire source file to the entire destination file, the source file
+ * has a cowextsize hint, and the destination file does not.
+ */
+ cowextsize = 0;
+ if (pos_in == 0 && len == i_size_read(inode_in) &&
+ (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+ pos_out == 0 && len >= i_size_read(inode_out) &&
+ !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+ cowextsize = src->i_d.di_cowextsize;
+
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ remap_flags);
+
+out_unlock:
+ xfs_reflink_remap_unlock(file_in, file_out);
+ if (ret)
+ trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+ return remapped > 0 ? remapped : ret;
}
STATIC int
@@ -1029,10 +1113,10 @@
default:
return generic_file_llseek(file, offset, whence);
case SEEK_HOLE:
- offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops);
+ offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
break;
case SEEK_DATA:
- offset = iomap_seek_data(inode, offset, &xfs_iomap_ops);
+ offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
break;
}
@@ -1144,11 +1228,14 @@
struct file *filp,
struct vm_area_struct *vma)
{
+ struct dax_device *dax_dev;
+
+ dax_dev = xfs_find_daxdev_for_inode(file_inode(filp));
/*
- * We don't support synchronous mappings for non-DAX files. At least
- * until someone comes with a sensible use case.
+ * We don't support synchronous mappings for non-DAX files and
+ * for DAX files if underneath dax_device is not synchronous.
*/
- if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC))
+ if (!daxdev_mapping_supported(vma, dax_dev))
return -EOPNOTSUPP;
file_accessed(filp);
@@ -1164,6 +1251,7 @@
.write_iter = xfs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
+ .iopoll = iomap_dio_iopoll,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
@@ -1175,8 +1263,8 @@
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
- .clone_file_range = xfs_file_clone_range,
- .dedupe_file_range = xfs_file_dedupe_range,
+ .fadvise = xfs_file_fadvise,
+ .remap_file_range = xfs_file_remap_range,
};
const struct file_operations xfs_dir_file_operations = {