Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index ab5e928..b019f27 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -10,10 +10,20 @@
#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/splice.h>
+#include <linux/security.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include "overlayfs.h"
+struct ovl_aio_req {
+ struct kiocb iocb;
+ refcount_t ref;
+ struct kiocb *orig_iocb;
+ struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ovl_inode_upper(inode))
@@ -34,10 +44,22 @@
struct file *realfile;
const struct cred *old_cred;
int flags = file->f_flags | OVL_OPEN_FLAGS;
+ int acc_mode = ACC_MODE(flags);
+ int err;
+
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
old_cred = ovl_override_creds(inode->i_sb);
- realfile = open_with_fake_path(&file->f_path, flags, realinode,
- current_cred());
+ err = inode_permission(realinode, MAY_OPEN | acc_mode);
+ if (err) {
+ realfile = ERR_PTR(err);
+ } else if (!inode_owner_or_capable(realinode)) {
+ realfile = ERR_PTR(-EPERM);
+ } else {
+ realfile = open_with_fake_path(&file->f_path, flags, realinode,
+ current_cred());
+ }
revert_creds(old_cred);
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
@@ -115,6 +137,13 @@
static int ovl_real_fdget(const struct file *file, struct fd *real)
{
+ if (d_is_dir(file_dentry(file))) {
+ real->flags = 0;
+ real->file = ovl_dir_real_file(file, false);
+
+ return PTR_ERR_OR_ZERO(real->file);
+ }
+
return ovl_real_fdget_meta(file, real, false);
}
@@ -176,7 +205,7 @@
* limitations that are more strict than ->s_maxbytes for specific
* files, so we use the real file to perform seeks.
*/
- inode_lock(inode);
+ ovl_inode_lock(inode);
real.file->f_pos = file->f_pos;
old_cred = ovl_override_creds(inode->i_sb);
@@ -184,7 +213,7 @@
revert_creds(old_cred);
file->f_pos = real.file->f_pos;
- inode_unlock(inode);
+ ovl_inode_unlock(inode);
fdput(real);
@@ -213,9 +242,8 @@
touch_atime(&file->f_path);
}
-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
+static rwf_t ovl_iocb_to_rwf(int ifl)
{
- int ifl = iocb->ki_flags;
rwf_t flags = 0;
if (ifl & IOCB_NOWAIT)
@@ -230,6 +258,43 @@
return flags;
}
+static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
+{
+ if (refcount_dec_and_test(&aio_req->ref)) {
+ fdput(aio_req->fd);
+ kmem_cache_free(ovl_aio_request_cachep, aio_req);
+ }
+}
+
+static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
+{
+ struct kiocb *iocb = &aio_req->iocb;
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ if (iocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(orig_iocb->ki_filp);
+
+ /* Actually acquired in ovl_write_iter() */
+ __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
+ SB_FREEZE_WRITE);
+ file_end_write(iocb->ki_filp);
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ }
+
+ orig_iocb->ki_pos = iocb->ki_pos;
+ ovl_aio_put(aio_req);
+}
+
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+ struct ovl_aio_req *aio_req = container_of(iocb,
+ struct ovl_aio_req, iocb);
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ ovl_aio_cleanup_handler(aio_req);
+ orig_iocb->ki_complete(orig_iocb, res, res2);
+}
+
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -244,13 +309,39 @@
if (ret)
return ret;
+ ret = -EINVAL;
+ if (iocb->ki_flags & IOCB_DIRECT &&
+ (!real.file->f_mapping->a_ops ||
+ !real.file->f_mapping->a_ops->direct_IO))
+ goto out_fdput;
+
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ if (is_sync_kiocb(iocb)) {
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(iocb->ki_flags));
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ refcount_set(&aio_req->ref, 2);
+ ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+ ovl_aio_put(aio_req);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
ovl_file_accessed(file);
-
+out_fdput:
fdput(real);
return ret;
@@ -263,6 +354,7 @@
struct fd real;
const struct cred *old_cred;
ssize_t ret;
+ int ifl = iocb->ki_flags;
if (!iov_iter_count(iter))
return 0;
@@ -278,16 +370,50 @@
if (ret)
goto out_unlock;
+ ret = -EINVAL;
+ if (iocb->ki_flags & IOCB_DIRECT &&
+ (!real.file->f_mapping->a_ops ||
+ !real.file->f_mapping->a_ops->direct_IO))
+ goto out_fdput;
+
+ if (!ovl_should_sync(OVL_FS(inode->i_sb)))
+ ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
+
old_cred = ovl_override_creds(file_inode(file)->i_sb);
- file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
- file_end_write(real.file);
+ if (is_sync_kiocb(iocb)) {
+ file_start_write(real.file);
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+ ovl_iocb_to_rwf(ifl));
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(ovl_inode_real(inode), inode);
+ } else {
+ struct ovl_aio_req *aio_req;
+
+ ret = -ENOMEM;
+ aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
+ if (!aio_req)
+ goto out;
+
+ file_start_write(real.file);
+ /* Pacify lockdep, same trick as done in aio_write() */
+ __sb_writers_release(file_inode(real.file)->i_sb,
+ SB_FREEZE_WRITE);
+ aio_req->fd = real;
+ real.flags = 0;
+ aio_req->orig_iocb = iocb;
+ kiocb_clone(&aio_req->iocb, iocb, real.file);
+ aio_req->iocb.ki_flags = ifl;
+ aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ refcount_set(&aio_req->ref, 2);
+ ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+ ovl_aio_put(aio_req);
+ if (ret != -EIOCBQUEUED)
+ ovl_aio_cleanup_handler(aio_req);
+ }
+out:
revert_creds(old_cred);
-
- /* Update size */
- ovl_copyattr(ovl_inode_real(inode), inode);
-
+out_fdput:
fdput(real);
out_unlock:
@@ -296,45 +422,48 @@
return ret;
}
-static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe, size_t len,
- unsigned int flags)
+/*
+ * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
+ * due to lock order inversion between pipe->mutex in iter_file_splice_write()
+ * and file_start_write(real.file) in ovl_write_iter().
+ *
+ * So do everything ovl_write_iter() does and call iter_file_splice_write() on
+ * the real file.
+ */
+static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
{
- ssize_t ret;
struct fd real;
const struct cred *old_cred;
+ struct inode *inode = file_inode(out);
+ struct inode *realinode = ovl_inode_real(inode);
+ ssize_t ret;
- ret = ovl_real_fdget(in, &real);
+ inode_lock(inode);
+ /* Update mode */
+ ovl_copyattr(realinode, inode);
+ ret = file_remove_privs(out);
if (ret)
- return ret;
-
- old_cred = ovl_override_creds(file_inode(in)->i_sb);
- ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
- revert_creds(old_cred);
-
- ovl_file_accessed(in);
- fdput(real);
- return ret;
-}
-
-static ssize_t
-ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
- loff_t *ppos, size_t len, unsigned int flags)
-{
- struct fd real;
- const struct cred *old_cred;
- ssize_t ret;
+ goto out_unlock;
ret = ovl_real_fdget(out, &real);
if (ret)
- return ret;
+ goto out_unlock;
- old_cred = ovl_override_creds(file_inode(out)->i_sb);
+ old_cred = ovl_override_creds(inode->i_sb);
+ file_start_write(real.file);
+
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
- revert_creds(old_cred);
- ovl_file_accessed(out);
+ file_end_write(real.file);
+ /* Update size */
+ ovl_copyattr(realinode, inode);
+ revert_creds(old_cred);
fdput(real);
+
+out_unlock:
+ inode_unlock(inode);
+
return ret;
}
@@ -344,6 +473,10 @@
const struct cred *old_cred;
int ret;
+ ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
+ if (ret <= 0)
+ return ret;
+
ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret)
return ret;
@@ -437,16 +570,20 @@
unsigned long arg)
{
struct fd real;
- const struct cred *old_cred;
long ret;
ret = ovl_real_fdget(file, &real);
if (ret)
return ret;
- old_cred = ovl_override_creds(file_inode(file)->i_sb);
- ret = vfs_ioctl(real.file, cmd, arg);
- revert_creds(old_cred);
+ ret = security_file_ioctl(real.file, cmd, arg);
+ if (!ret) {
+ /*
+ * Don't override creds, since we currently can't safely check
+ * permissions before doing so.
+ */
+ ret = vfs_ioctl(real.file, cmd, arg);
+ }
fdput(real);
@@ -454,11 +591,10 @@
}
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int iflags)
+ unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
- unsigned int old_iflags;
if (!inode_owner_or_capable(inode))
return -EACCES;
@@ -469,10 +605,12 @@
inode_lock(inode);
- /* Check the capability before cred override */
+ /*
+ * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
+ * capability.
+ */
ret = -EPERM;
- old_iflags = READ_ONCE(inode->i_flags);
- if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
+ if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
!capable(CAP_LINUX_IMMUTABLE))
goto unlock;
@@ -492,64 +630,7 @@
}
-static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
-{
- unsigned int iflags = 0;
-
- if (flags & FS_SYNC_FL)
- iflags |= S_SYNC;
- if (flags & FS_APPEND_FL)
- iflags |= S_APPEND;
- if (flags & FS_IMMUTABLE_FL)
- iflags |= S_IMMUTABLE;
- if (flags & FS_NOATIME_FL)
- iflags |= S_NOATIME;
-
- return iflags;
-}
-
-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- unsigned int flags;
-
- if (get_user(flags, (int __user *) arg))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsflags_to_iflags(flags));
-}
-
-static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
-{
- unsigned int iflags = 0;
-
- if (xflags & FS_XFLAG_SYNC)
- iflags |= S_SYNC;
- if (xflags & FS_XFLAG_APPEND)
- iflags |= S_APPEND;
- if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= S_IMMUTABLE;
- if (xflags & FS_XFLAG_NOATIME)
- iflags |= S_NOATIME;
-
- return iflags;
-}
-
-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(fa));
- if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
- return -EFAULT;
-
- return ovl_ioctl_set_flags(file, cmd, arg,
- ovl_fsxflags_to_iflags(fa.fsx_xflags));
-}
-
-static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
@@ -559,12 +640,9 @@
ret = ovl_real_ioctl(file, cmd, arg);
break;
- case FS_IOC_SETFLAGS:
- ret = ovl_ioctl_set_fsflags(file, cmd, arg);
- break;
-
case FS_IOC_FSSETXATTR:
- ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
+ case FS_IOC_SETFLAGS:
+ ret = ovl_ioctl_set_flags(file, cmd, arg);
break;
default:
@@ -574,8 +652,8 @@
return ret;
}
-static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
+#ifdef CONFIG_COMPAT
+long ovl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FS_IOC32_GETFLAGS:
@@ -592,6 +670,7 @@
return ovl_ioctl(file, cmd, arg);
}
+#endif
enum ovl_copyop {
OVL_COPY,
@@ -693,10 +772,28 @@
.fallocate = ovl_fallocate,
.fadvise = ovl_fadvise,
.unlocked_ioctl = ovl_ioctl,
+#ifdef CONFIG_COMPAT
.compat_ioctl = ovl_compat_ioctl,
- .splice_read = ovl_splice_read,
+#endif
+ .splice_read = generic_file_splice_read,
.splice_write = ovl_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
};
+
+int __init ovl_aio_request_cache_init(void)
+{
+ ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+ sizeof(struct ovl_aio_req),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (!ovl_aio_request_cachep)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ovl_aio_request_cache_destroy(void)
+{
+ kmem_cache_destroy(ovl_aio_request_cachep);
+}