Blame - fs/btrfs/ioctl.c - hafnium/third_party/linux

blob: e9d3eb7f0e2b871ae1c7a995772d668f7d548c9e [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright (C) 2007 Oracle. All rights reserved.
				4	*/
				5
				6	#include <linux/kernel.h>
				7	#include <linux/bio.h>
				8	#include <linux/file.h>
				9	#include <linux/fs.h>
				10	#include <linux/fsnotify.h>
				11	#include <linux/pagemap.h>
				12	#include <linux/highmem.h>
				13	#include <linux/time.h>
				14	#include <linux/string.h>
				15	#include <linux/backing-dev.h>
				16	#include <linux/mount.h>
				17	#include <linux/namei.h>
				18	#include <linux/writeback.h>
				19	#include <linux/compat.h>
				20	#include <linux/security.h>
				21	#include <linux/xattr.h>
				22	#include <linux/mm.h>
				23	#include <linux/slab.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/uuid.h>
				26	#include <linux/btrfs.h>
				27	#include <linux/uaccess.h>
				28	#include <linux/iversion.h>
				29	#include "ctree.h"
				30	#include "disk-io.h"
				31	#include "transaction.h"
				32	#include "btrfs_inode.h"
				33	#include "print-tree.h"
				34	#include "volumes.h"
				35	#include "locking.h"
				36	#include "inode-map.h"
				37	#include "backref.h"
				38	#include "rcu-string.h"
				39	#include "send.h"
				40	#include "dev-replace.h"
				41	#include "props.h"
				42	#include "sysfs.h"
				43	#include "qgroup.h"
				44	#include "tree-log.h"
				45	#include "compression.h"
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	46	#include "space-info.h"
				47	#include "delalloc-space.h"
				48	#include "block-group.h"
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	49
				50	#ifdef CONFIG_64BIT
				51	/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
				52	* structures are incorrect, as the timespec structure from userspace
				53	* is 4 bytes too small. We define these alternatives here to teach
				54	* the kernel about the 32-bit struct packing.
				55	*/
				56	struct btrfs_ioctl_timespec_32 {
				57	__u64 sec;
				58	__u32 nsec;
				59	} __attribute__ ((__packed__));
				60
				61	struct btrfs_ioctl_received_subvol_args_32 {
				62	char uuid[BTRFS_UUID_SIZE]; /* in */
				63	__u64 stransid; /* in */
				64	__u64 rtransid; /* out */
				65	struct btrfs_ioctl_timespec_32 stime; /* in */
				66	struct btrfs_ioctl_timespec_32 rtime; /* out */
				67	__u64 flags; /* in */
				68	__u64 reserved[16]; /* in */
				69	} __attribute__ ((__packed__));
				70
				71	#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
				72	struct btrfs_ioctl_received_subvol_args_32)
				73	#endif
				74
				75	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
				76	struct btrfs_ioctl_send_args_32 {
				77	__s64 send_fd; /* in */
				78	__u64 clone_sources_count; /* in */
				79	compat_uptr_t clone_sources; /* in */
				80	__u64 parent_root; /* in */
				81	__u64 flags; /* in */
				82	__u64 reserved[4]; /* in */
				83	} __attribute__ ((__packed__));
				84
				85	#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
				86	struct btrfs_ioctl_send_args_32)
				87	#endif
				88
				89	static int btrfs_clone(struct inode src, struct inode inode,
				90	u64 off, u64 olen, u64 olen_aligned, u64 destoff,
				91	int no_time_update);
				92
				93	/* Mask out flags that are inappropriate for the given type of inode. */
				94	static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
				95	unsigned int flags)
				96	{
				97	if (S_ISDIR(inode->i_mode))
				98	return flags;
				99	else if (S_ISREG(inode->i_mode))
				100	return flags & ~FS_DIRSYNC_FL;
				101	else
				102	return flags & (FS_NODUMP_FL \| FS_NOATIME_FL);
				103	}
				104
				105	/*
				106	* Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
				107	* ioctl.
				108	*/
				109	static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
				110	{
				111	unsigned int iflags = 0;
				112
				113	if (flags & BTRFS_INODE_SYNC)
				114	iflags \|= FS_SYNC_FL;
				115	if (flags & BTRFS_INODE_IMMUTABLE)
				116	iflags \|= FS_IMMUTABLE_FL;
				117	if (flags & BTRFS_INODE_APPEND)
				118	iflags \|= FS_APPEND_FL;
				119	if (flags & BTRFS_INODE_NODUMP)
				120	iflags \|= FS_NODUMP_FL;
				121	if (flags & BTRFS_INODE_NOATIME)
				122	iflags \|= FS_NOATIME_FL;
				123	if (flags & BTRFS_INODE_DIRSYNC)
				124	iflags \|= FS_DIRSYNC_FL;
				125	if (flags & BTRFS_INODE_NODATACOW)
				126	iflags \|= FS_NOCOW_FL;
				127
				128	if (flags & BTRFS_INODE_NOCOMPRESS)
				129	iflags \|= FS_NOCOMP_FL;
				130	else if (flags & BTRFS_INODE_COMPRESS)
				131	iflags \|= FS_COMPR_FL;
				132
				133	return iflags;
				134	}
				135
				136	/*
				137	* Update inode->i_flags based on the btrfs internal flags.
				138	*/
				139	void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
				140	{
				141	struct btrfs_inode *binode = BTRFS_I(inode);
				142	unsigned int new_fl = 0;
				143
				144	if (binode->flags & BTRFS_INODE_SYNC)
				145	new_fl \|= S_SYNC;
				146	if (binode->flags & BTRFS_INODE_IMMUTABLE)
				147	new_fl \|= S_IMMUTABLE;
				148	if (binode->flags & BTRFS_INODE_APPEND)
				149	new_fl \|= S_APPEND;
				150	if (binode->flags & BTRFS_INODE_NOATIME)
				151	new_fl \|= S_NOATIME;
				152	if (binode->flags & BTRFS_INODE_DIRSYNC)
				153	new_fl \|= S_DIRSYNC;
				154
				155	set_mask_bits(&inode->i_flags,
				156	S_SYNC \| S_APPEND \| S_IMMUTABLE \| S_NOATIME \| S_DIRSYNC,
				157	new_fl);
				158	}
				159
				160	static int btrfs_ioctl_getflags(struct file file, void __user arg)
				161	{
				162	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
				163	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
				164
				165	if (copy_to_user(arg, &flags, sizeof(flags)))
				166	return -EFAULT;
				167	return 0;
				168	}
				169
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	170	/*
				171	* Check if @flags are a supported and valid set of FS_*_FL flags and that
				172	* the old and new flags are not conflicting
				173	*/
				174	static int check_fsflags(unsigned int old_flags, unsigned int flags)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	175	{
				176	if (flags & ~(FS_IMMUTABLE_FL \| FS_APPEND_FL \| \
				177	FS_NOATIME_FL \| FS_NODUMP_FL \| \
				178	FS_SYNC_FL \| FS_DIRSYNC_FL \| \
				179	FS_NOCOMP_FL \| FS_COMPR_FL \|
				180	FS_NOCOW_FL))
				181	return -EOPNOTSUPP;
				182
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	183	/* COMPR and NOCOMP on new/old are valid */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	184	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
				185	return -EINVAL;
				186
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	187	if ((flags & FS_COMPR_FL) && (flags & FS_NOCOW_FL))
				188	return -EINVAL;
				189
				190	/* NOCOW and compression options are mutually exclusive */
				191	if ((old_flags & FS_NOCOW_FL) && (flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
				192	return -EINVAL;
				193	if ((flags & FS_NOCOW_FL) && (old_flags & (FS_COMPR_FL \| FS_NOCOMP_FL)))
				194	return -EINVAL;
				195
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	196	return 0;
				197	}
				198
				199	static int btrfs_ioctl_setflags(struct file file, void __user arg)
				200	{
				201	struct inode *inode = file_inode(file);
				202	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				203	struct btrfs_inode *binode = BTRFS_I(inode);
				204	struct btrfs_root *root = binode->root;
				205	struct btrfs_trans_handle *trans;
				206	unsigned int fsflags, old_fsflags;
				207	int ret;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	208	const char *comp = NULL;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	209	u32 binode_flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	210
				211	if (!inode_owner_or_capable(inode))
				212	return -EPERM;
				213
				214	if (btrfs_root_readonly(root))
				215	return -EROFS;
				216
				217	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
				218	return -EFAULT;
				219
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	220	ret = mnt_want_write_file(file);
				221	if (ret)
				222	return ret;
				223
				224	inode_lock(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	225	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
				226	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	227
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	228	ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
				229	if (ret)
				230	goto out_unlock;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	231
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	232	ret = check_fsflags(old_fsflags, fsflags);
				233	if (ret)
				234	goto out_unlock;
				235
				236	binode_flags = binode->flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	237	if (fsflags & FS_SYNC_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	238	binode_flags \|= BTRFS_INODE_SYNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	239	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	240	binode_flags &= ~BTRFS_INODE_SYNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	241	if (fsflags & FS_IMMUTABLE_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	242	binode_flags \|= BTRFS_INODE_IMMUTABLE;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	243	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	244	binode_flags &= ~BTRFS_INODE_IMMUTABLE;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	245	if (fsflags & FS_APPEND_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	246	binode_flags \|= BTRFS_INODE_APPEND;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	247	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	248	binode_flags &= ~BTRFS_INODE_APPEND;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	249	if (fsflags & FS_NODUMP_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	250	binode_flags \|= BTRFS_INODE_NODUMP;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	251	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	252	binode_flags &= ~BTRFS_INODE_NODUMP;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	253	if (fsflags & FS_NOATIME_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	254	binode_flags \|= BTRFS_INODE_NOATIME;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	255	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	256	binode_flags &= ~BTRFS_INODE_NOATIME;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	257	if (fsflags & FS_DIRSYNC_FL)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	258	binode_flags \|= BTRFS_INODE_DIRSYNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	259	else
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	260	binode_flags &= ~BTRFS_INODE_DIRSYNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	261	if (fsflags & FS_NOCOW_FL) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	262	if (S_ISREG(inode->i_mode)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	263	/*
				264	* It's safe to turn csums off here, no extents exist.
				265	* Otherwise we want the flag to reflect the real COW
				266	* status of the file and will not set it.
				267	*/
				268	if (inode->i_size == 0)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	269	binode_flags \|= BTRFS_INODE_NODATACOW \|
				270	BTRFS_INODE_NODATASUM;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	271	} else {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	272	binode_flags \|= BTRFS_INODE_NODATACOW;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	273	}
				274	} else {
				275	/*
				276	* Revert back under same assumptions as above
				277	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	278	if (S_ISREG(inode->i_mode)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	279	if (inode->i_size == 0)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	280	binode_flags &= ~(BTRFS_INODE_NODATACOW \|
				281	BTRFS_INODE_NODATASUM);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	282	} else {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	283	binode_flags &= ~BTRFS_INODE_NODATACOW;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	284	}
				285	}
				286
				287	/*
				288	* The COMPRESS flag can only be changed by users, while the NOCOMPRESS
				289	* flag may be changed automatically if compression code won't make
				290	* things smaller.
				291	*/
				292	if (fsflags & FS_NOCOMP_FL) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	293	binode_flags &= ~BTRFS_INODE_COMPRESS;
				294	binode_flags \|= BTRFS_INODE_NOCOMPRESS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	295	} else if (fsflags & FS_COMPR_FL) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	296
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	297	if (IS_SWAPFILE(inode)) {
				298	ret = -ETXTBSY;
				299	goto out_unlock;
				300	}
				301
				302	binode_flags \|= BTRFS_INODE_COMPRESS;
				303	binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	304
				305	comp = btrfs_compress_type2str(fs_info->compress_type);
				306	if (!comp \|\| comp[0] == 0)
				307	comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	308	} else {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	309	binode_flags &= ~(BTRFS_INODE_COMPRESS \| BTRFS_INODE_NOCOMPRESS);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	310	}
				311
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	312	/*
				313	* 1 for inode item
				314	* 2 for properties
				315	*/
				316	trans = btrfs_start_transaction(root, 3);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	317	if (IS_ERR(trans)) {
				318	ret = PTR_ERR(trans);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	319	goto out_unlock;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	320	}
				321
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	322	if (comp) {
				323	ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
				324	strlen(comp), 0);
				325	if (ret) {
				326	btrfs_abort_transaction(trans, ret);
				327	goto out_end_trans;
				328	}
				329	} else {
				330	ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
				331	0, 0);
				332	if (ret && ret != -ENODATA) {
				333	btrfs_abort_transaction(trans, ret);
				334	goto out_end_trans;
				335	}
				336	}
				337
				338	binode->flags = binode_flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	339	btrfs_sync_inode_flags_to_i_flags(inode);
				340	inode_inc_iversion(inode);
				341	inode->i_ctime = current_time(inode);
				342	ret = btrfs_update_inode(trans, root, inode);
				343
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	344	out_end_trans:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	345	btrfs_end_transaction(trans);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	346	out_unlock:
				347	inode_unlock(inode);
				348	mnt_drop_write_file(file);
				349	return ret;
				350	}
				351
				352	/*
				353	* Translate btrfs internal inode flags to xflags as expected by the
				354	* FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
				355	* silently dropped.
				356	*/
				357	static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
				358	{
				359	unsigned int xflags = 0;
				360
				361	if (flags & BTRFS_INODE_APPEND)
				362	xflags \|= FS_XFLAG_APPEND;
				363	if (flags & BTRFS_INODE_IMMUTABLE)
				364	xflags \|= FS_XFLAG_IMMUTABLE;
				365	if (flags & BTRFS_INODE_NOATIME)
				366	xflags \|= FS_XFLAG_NOATIME;
				367	if (flags & BTRFS_INODE_NODUMP)
				368	xflags \|= FS_XFLAG_NODUMP;
				369	if (flags & BTRFS_INODE_SYNC)
				370	xflags \|= FS_XFLAG_SYNC;
				371
				372	return xflags;
				373	}
				374
				375	/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
				376	static int check_xflags(unsigned int flags)
				377	{
				378	if (flags & ~(FS_XFLAG_APPEND \| FS_XFLAG_IMMUTABLE \| FS_XFLAG_NOATIME \|
				379	FS_XFLAG_NODUMP \| FS_XFLAG_SYNC))
				380	return -EOPNOTSUPP;
				381	return 0;
				382	}
				383
				384	/*
				385	* Set the xflags from the internal inode flags. The remaining items of fsxattr
				386	* are zeroed.
				387	*/
				388	static int btrfs_ioctl_fsgetxattr(struct file file, void __user arg)
				389	{
				390	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
				391	struct fsxattr fa;
				392
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	393	simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	394	if (copy_to_user(arg, &fa, sizeof(fa)))
				395	return -EFAULT;
				396
				397	return 0;
				398	}
				399
				400	static int btrfs_ioctl_fssetxattr(struct file file, void __user arg)
				401	{
				402	struct inode *inode = file_inode(file);
				403	struct btrfs_inode *binode = BTRFS_I(inode);
				404	struct btrfs_root *root = binode->root;
				405	struct btrfs_trans_handle *trans;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	406	struct fsxattr fa, old_fa;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	407	unsigned old_flags;
				408	unsigned old_i_flags;
				409	int ret = 0;
				410
				411	if (!inode_owner_or_capable(inode))
				412	return -EPERM;
				413
				414	if (btrfs_root_readonly(root))
				415	return -EROFS;
				416
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	417	if (copy_from_user(&fa, arg, sizeof(fa)))
				418	return -EFAULT;
				419
				420	ret = check_xflags(fa.fsx_xflags);
				421	if (ret)
				422	return ret;
				423
				424	if (fa.fsx_extsize != 0 \|\| fa.fsx_projid != 0 \|\| fa.fsx_cowextsize != 0)
				425	return -EOPNOTSUPP;
				426
				427	ret = mnt_want_write_file(file);
				428	if (ret)
				429	return ret;
				430
				431	inode_lock(inode);
				432
				433	old_flags = binode->flags;
				434	old_i_flags = inode->i_flags;
				435
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	436	simple_fill_fsxattr(&old_fa,
				437	btrfs_inode_flags_to_xflags(binode->flags));
				438	ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
				439	if (ret)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	440	goto out_unlock;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	441
				442	if (fa.fsx_xflags & FS_XFLAG_SYNC)
				443	binode->flags \|= BTRFS_INODE_SYNC;
				444	else
				445	binode->flags &= ~BTRFS_INODE_SYNC;
				446	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
				447	binode->flags \|= BTRFS_INODE_IMMUTABLE;
				448	else
				449	binode->flags &= ~BTRFS_INODE_IMMUTABLE;
				450	if (fa.fsx_xflags & FS_XFLAG_APPEND)
				451	binode->flags \|= BTRFS_INODE_APPEND;
				452	else
				453	binode->flags &= ~BTRFS_INODE_APPEND;
				454	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
				455	binode->flags \|= BTRFS_INODE_NODUMP;
				456	else
				457	binode->flags &= ~BTRFS_INODE_NODUMP;
				458	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
				459	binode->flags \|= BTRFS_INODE_NOATIME;
				460	else
				461	binode->flags &= ~BTRFS_INODE_NOATIME;
				462
				463	/* 1 item for the inode */
				464	trans = btrfs_start_transaction(root, 1);
				465	if (IS_ERR(trans)) {
				466	ret = PTR_ERR(trans);
				467	goto out_unlock;
				468	}
				469
				470	btrfs_sync_inode_flags_to_i_flags(inode);
				471	inode_inc_iversion(inode);
				472	inode->i_ctime = current_time(inode);
				473	ret = btrfs_update_inode(trans, root, inode);
				474
				475	btrfs_end_transaction(trans);
				476
				477	out_unlock:
				478	if (ret) {
				479	binode->flags = old_flags;
				480	inode->i_flags = old_i_flags;
				481	}
				482
				483	inode_unlock(inode);
				484	mnt_drop_write_file(file);
				485
				486	return ret;
				487	}
				488
				489	static int btrfs_ioctl_getversion(struct file file, int __user arg)
				490	{
				491	struct inode *inode = file_inode(file);
				492
				493	return put_user(inode->i_generation, arg);
				494	}
				495
				496	static noinline int btrfs_ioctl_fitrim(struct file file, void __user arg)
				497	{
				498	struct inode *inode = file_inode(file);
				499	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				500	struct btrfs_device *device;
				501	struct request_queue *q;
				502	struct fstrim_range range;
				503	u64 minlen = ULLONG_MAX;
				504	u64 num_devices = 0;
				505	int ret;
				506
				507	if (!capable(CAP_SYS_ADMIN))
				508	return -EPERM;
				509
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	510	/*
				511	* If the fs is mounted with nologreplay, which requires it to be
				512	* mounted in RO mode as well, we can not allow discard on free space
				513	* inside block groups, because log trees refer to extents that are not
				514	* pinned in a block group's free space cache (pinning the extents is
				515	* precisely the first phase of replaying a log tree).
				516	*/
				517	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
				518	return -EROFS;
				519
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	520	rcu_read_lock();
				521	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
				522	dev_list) {
				523	if (!device->bdev)
				524	continue;
				525	q = bdev_get_queue(device->bdev);
				526	if (blk_queue_discard(q)) {
				527	num_devices++;
				528	minlen = min_t(u64, q->limits.discard_granularity,
				529	minlen);
				530	}
				531	}
				532	rcu_read_unlock();
				533
				534	if (!num_devices)
				535	return -EOPNOTSUPP;
				536	if (copy_from_user(&range, arg, sizeof(range)))
				537	return -EFAULT;
				538
				539	/*
				540	* NOTE: Don't truncate the range using super->total_bytes. Bytenr of
				541	* block group is in the logical address space, which can be any
				542	* sectorsize aligned bytenr in the range [0, U64_MAX].
				543	*/
				544	if (range.len < fs_info->sb->s_blocksize)
				545	return -EINVAL;
				546
				547	range.minlen = max(range.minlen, minlen);
				548	ret = btrfs_trim_fs(fs_info, &range);
				549	if (ret < 0)
				550	return ret;
				551
				552	if (copy_to_user(arg, &range, sizeof(range)))
				553	return -EFAULT;
				554
				555	return 0;
				556	}
				557
				558	int btrfs_is_empty_uuid(u8 *uuid)
				559	{
				560	int i;
				561
				562	for (i = 0; i < BTRFS_UUID_SIZE; i++) {
				563	if (uuid[i])
				564	return 0;
				565	}
				566	return 1;
				567	}
				568
				569	static noinline int create_subvol(struct inode *dir,
				570	struct dentry *dentry,
				571	const char *name, int namelen,
				572	u64 *async_transid,
				573	struct btrfs_qgroup_inherit *inherit)
				574	{
				575	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
				576	struct btrfs_trans_handle *trans;
				577	struct btrfs_key key;
				578	struct btrfs_root_item *root_item;
				579	struct btrfs_inode_item *inode_item;
				580	struct extent_buffer *leaf;
				581	struct btrfs_root *root = BTRFS_I(dir)->root;
				582	struct btrfs_root *new_root;
				583	struct btrfs_block_rsv block_rsv;
				584	struct timespec64 cur_time = current_time(dir);
				585	struct inode *inode;
				586	int ret;
				587	int err;
				588	u64 objectid;
				589	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
				590	u64 index = 0;
				591	uuid_le new_uuid;
				592
				593	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
				594	if (!root_item)
				595	return -ENOMEM;
				596
				597	ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
				598	if (ret)
				599	goto fail_free;
				600
				601	/*
				602	* Don't create subvolume whose level is not zero. Or qgroup will be
				603	* screwed up since it assumes subvolume qgroup's level to be 0.
				604	*/
				605	if (btrfs_qgroup_level(objectid)) {
				606	ret = -ENOSPC;
				607	goto fail_free;
				608	}
				609
				610	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
				611	/*
				612	* The same as the snapshot creation, please see the comment
				613	* of create_snapshot().
				614	*/
				615	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
				616	if (ret)
				617	goto fail_free;
				618
				619	trans = btrfs_start_transaction(root, 0);
				620	if (IS_ERR(trans)) {
				621	ret = PTR_ERR(trans);
				622	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
				623	goto fail_free;
				624	}
				625	trans->block_rsv = &block_rsv;
				626	trans->bytes_reserved = block_rsv.size;
				627
				628	ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
				629	if (ret)
				630	goto fail;
				631
				632	leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
				633	if (IS_ERR(leaf)) {
				634	ret = PTR_ERR(leaf);
				635	goto fail;
				636	}
				637
				638	btrfs_mark_buffer_dirty(leaf);
				639
				640	inode_item = &root_item->inode;
				641	btrfs_set_stack_inode_generation(inode_item, 1);
				642	btrfs_set_stack_inode_size(inode_item, 3);
				643	btrfs_set_stack_inode_nlink(inode_item, 1);
				644	btrfs_set_stack_inode_nbytes(inode_item,
				645	fs_info->nodesize);
				646	btrfs_set_stack_inode_mode(inode_item, S_IFDIR \| 0755);
				647
				648	btrfs_set_root_flags(root_item, 0);
				649	btrfs_set_root_limit(root_item, 0);
				650	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
				651
				652	btrfs_set_root_bytenr(root_item, leaf->start);
				653	btrfs_set_root_generation(root_item, trans->transid);
				654	btrfs_set_root_level(root_item, 0);
				655	btrfs_set_root_refs(root_item, 1);
				656	btrfs_set_root_used(root_item, leaf->len);
				657	btrfs_set_root_last_snapshot(root_item, 0);
				658
				659	btrfs_set_root_generation_v2(root_item,
				660	btrfs_root_generation(root_item));
				661	uuid_le_gen(&new_uuid);
				662	memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
				663	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
				664	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
				665	root_item->ctime = root_item->otime;
				666	btrfs_set_root_ctransid(root_item, trans->transid);
				667	btrfs_set_root_otransid(root_item, trans->transid);
				668
				669	btrfs_tree_unlock(leaf);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	670
				671	btrfs_set_root_dirid(root_item, new_dirid);
				672
				673	key.objectid = objectid;
				674	key.offset = 0;
				675	key.type = BTRFS_ROOT_ITEM_KEY;
				676	ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
				677	root_item);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	678	if (ret) {
				679	/*
				680	* Since we don't abort the transaction in this case, free the
				681	* tree block so that we don't leak space and leave the
				682	* filesystem in an inconsistent state (an extent item in the
				683	* extent tree without backreferences). Also no need to have
				684	* the tree block locked since it is not in any tree at this
				685	* point, so no other task can find it and use it.
				686	*/
				687	btrfs_free_tree_block(trans, root, leaf, 0, 1);
				688	free_extent_buffer(leaf);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	689	goto fail;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	690	}
				691
				692	free_extent_buffer(leaf);
				693	leaf = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	694
				695	key.offset = (u64)-1;
				696	new_root = btrfs_read_fs_root_no_name(fs_info, &key);
				697	if (IS_ERR(new_root)) {
				698	ret = PTR_ERR(new_root);
				699	btrfs_abort_transaction(trans, ret);
				700	goto fail;
				701	}
				702
				703	btrfs_record_root_in_trans(trans, new_root);
				704
				705	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
				706	if (ret) {
				707	/* We potentially lose an unused inode item here */
				708	btrfs_abort_transaction(trans, ret);
				709	goto fail;
				710	}
				711
				712	mutex_lock(&new_root->objectid_mutex);
				713	new_root->highest_objectid = new_dirid;
				714	mutex_unlock(&new_root->objectid_mutex);
				715
				716	/*
				717	* insert the directory item
				718	*/
				719	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
				720	if (ret) {
				721	btrfs_abort_transaction(trans, ret);
				722	goto fail;
				723	}
				724
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	725	ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	726	BTRFS_FT_DIR, index);
				727	if (ret) {
				728	btrfs_abort_transaction(trans, ret);
				729	goto fail;
				730	}
				731
				732	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
				733	ret = btrfs_update_inode(trans, root, dir);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	734	if (ret) {
				735	btrfs_abort_transaction(trans, ret);
				736	goto fail;
				737	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	738
				739	ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
				740	btrfs_ino(BTRFS_I(dir)), index, name, namelen);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	741	if (ret) {
				742	btrfs_abort_transaction(trans, ret);
				743	goto fail;
				744	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	745
				746	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
				747	BTRFS_UUID_KEY_SUBVOL, objectid);
				748	if (ret)
				749	btrfs_abort_transaction(trans, ret);
				750
				751	fail:
				752	kfree(root_item);
				753	trans->block_rsv = NULL;
				754	trans->bytes_reserved = 0;
				755	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
				756
				757	if (async_transid) {
				758	*async_transid = trans->transid;
				759	err = btrfs_commit_transaction_async(trans, 1);
				760	if (err)
				761	err = btrfs_commit_transaction(trans);
				762	} else {
				763	err = btrfs_commit_transaction(trans);
				764	}
				765	if (err && !ret)
				766	ret = err;
				767
				768	if (!ret) {
				769	inode = btrfs_lookup_dentry(dir, dentry);
				770	if (IS_ERR(inode))
				771	return PTR_ERR(inode);
				772	d_instantiate(dentry, inode);
				773	}
				774	return ret;
				775
				776	fail_free:
				777	kfree(root_item);
				778	return ret;
				779	}
				780
				781	static int create_snapshot(struct btrfs_root root, struct inode dir,
				782	struct dentry *dentry,
				783	u64 *async_transid, bool readonly,
				784	struct btrfs_qgroup_inherit *inherit)
				785	{
				786	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
				787	struct inode *inode;
				788	struct btrfs_pending_snapshot *pending_snapshot;
				789	struct btrfs_trans_handle *trans;
				790	int ret;
				791	bool snapshot_force_cow = false;
				792
				793	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
				794	return -EINVAL;
				795
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	796	if (atomic_read(&root->nr_swapfiles)) {
				797	btrfs_warn(fs_info,
				798	"cannot snapshot subvolume with active swapfile");
				799	return -ETXTBSY;
				800	}
				801
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	802	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
				803	if (!pending_snapshot)
				804	return -ENOMEM;
				805
				806	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
				807	GFP_KERNEL);
				808	pending_snapshot->path = btrfs_alloc_path();
				809	if (!pending_snapshot->root_item \|\| !pending_snapshot->path) {
				810	ret = -ENOMEM;
				811	goto free_pending;
				812	}
				813
				814	/*
				815	* Force new buffered writes to reserve space even when NOCOW is
				816	* possible. This is to avoid later writeback (running dealloc) to
				817	* fallback to COW mode and unexpectedly fail with ENOSPC.
				818	*/
				819	atomic_inc(&root->will_be_snapshotted);
				820	smp_mb__after_atomic();
				821	/* wait for no snapshot writes */
				822	wait_event(root->subv_writers->wait,
				823	percpu_counter_sum(&root->subv_writers->counter) == 0);
				824
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	825	ret = btrfs_start_delalloc_snapshot(root);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	826	if (ret)
				827	goto dec_and_free;
				828
				829	/*
				830	* All previous writes have started writeback in NOCOW mode, so now
				831	* we force future writes to fallback to COW mode during snapshot
				832	* creation.
				833	*/
				834	atomic_inc(&root->snapshot_force_cow);
				835	snapshot_force_cow = true;
				836
				837	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
				838
				839	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
				840	BTRFS_BLOCK_RSV_TEMP);
				841	/*
				842	* 1 - parent dir inode
				843	* 2 - dir entries
				844	* 1 - root item
				845	* 2 - root ref/backref
				846	* 1 - root of snapshot
				847	* 1 - UUID item
				848	*/
				849	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
				850	&pending_snapshot->block_rsv, 8,
				851	false);
				852	if (ret)
				853	goto dec_and_free;
				854
				855	pending_snapshot->dentry = dentry;
				856	pending_snapshot->root = root;
				857	pending_snapshot->readonly = readonly;
				858	pending_snapshot->dir = dir;
				859	pending_snapshot->inherit = inherit;
				860
				861	trans = btrfs_start_transaction(root, 0);
				862	if (IS_ERR(trans)) {
				863	ret = PTR_ERR(trans);
				864	goto fail;
				865	}
				866
				867	spin_lock(&fs_info->trans_lock);
				868	list_add(&pending_snapshot->list,
				869	&trans->transaction->pending_snapshots);
				870	spin_unlock(&fs_info->trans_lock);
				871	if (async_transid) {
				872	*async_transid = trans->transid;
				873	ret = btrfs_commit_transaction_async(trans, 1);
				874	if (ret)
				875	ret = btrfs_commit_transaction(trans);
				876	} else {
				877	ret = btrfs_commit_transaction(trans);
				878	}
				879	if (ret)
				880	goto fail;
				881
				882	ret = pending_snapshot->error;
				883	if (ret)
				884	goto fail;
				885
				886	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
				887	if (ret)
				888	goto fail;
				889
				890	inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
				891	if (IS_ERR(inode)) {
				892	ret = PTR_ERR(inode);
				893	goto fail;
				894	}
				895
				896	d_instantiate(dentry, inode);
				897	ret = 0;
				898	fail:
				899	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
				900	dec_and_free:
				901	if (snapshot_force_cow)
				902	atomic_dec(&root->snapshot_force_cow);
				903	if (atomic_dec_and_test(&root->will_be_snapshotted))
				904	wake_up_var(&root->will_be_snapshotted);
				905	free_pending:
				906	kfree(pending_snapshot->root_item);
				907	btrfs_free_path(pending_snapshot->path);
				908	kfree(pending_snapshot);
				909
				910	return ret;
				911	}
				912
				913	/* copy of may_delete in fs/namei.c()
				914	* Check whether we can remove a link victim from directory dir, check
				915	* whether the type of victim is right.
				916	* 1. We can't do it if dir is read-only (done in permission())
				917	* 2. We should have write and exec permissions on dir
				918	* 3. We can't remove anything from append-only dir
				919	* 4. We can't do anything with immutable dir (done in permission())
				920	* 5. If the sticky bit on dir is set we should either
				921	* a. be owner of dir, or
				922	* b. be owner of victim, or
				923	* c. have CAP_FOWNER capability
				924	* 6. If the victim is append-only or immutable we can't do anything with
				925	* links pointing to it.
				926	* 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
				927	* 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
				928	* 9. We can't remove a root or mountpoint.
				929	* 10. We don't allow removal of NFS sillyrenamed files; it's handled by
				930	* nfs_async_unlink().
				931	*/
				932
				933	static int btrfs_may_delete(struct inode dir, struct dentry victim, int isdir)
				934	{
				935	int error;
				936
				937	if (d_really_is_negative(victim))
				938	return -ENOENT;
				939
				940	BUG_ON(d_inode(victim->d_parent) != dir);
				941	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
				942
				943	error = inode_permission(dir, MAY_WRITE \| MAY_EXEC);
				944	if (error)
				945	return error;
				946	if (IS_APPEND(dir))
				947	return -EPERM;
				948	if (check_sticky(dir, d_inode(victim)) \|\| IS_APPEND(d_inode(victim)) \|\|
				949	IS_IMMUTABLE(d_inode(victim)) \|\| IS_SWAPFILE(d_inode(victim)))
				950	return -EPERM;
				951	if (isdir) {
				952	if (!d_is_dir(victim))
				953	return -ENOTDIR;
				954	if (IS_ROOT(victim))
				955	return -EBUSY;
				956	} else if (d_is_dir(victim))
				957	return -EISDIR;
				958	if (IS_DEADDIR(dir))
				959	return -ENOENT;
				960	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
				961	return -EBUSY;
				962	return 0;
				963	}
				964
				965	/* copy of may_create in fs/namei.c() */
				966	static inline int btrfs_may_create(struct inode dir, struct dentry child)
				967	{
				968	if (d_really_is_positive(child))
				969	return -EEXIST;
				970	if (IS_DEADDIR(dir))
				971	return -ENOENT;
				972	return inode_permission(dir, MAY_WRITE \| MAY_EXEC);
				973	}
				974
				975	/*
				976	* Create a new subvolume below @parent. This is largely modeled after
				977	* sys_mkdirat and vfs_mkdir, but we only do a single component lookup
				978	* inside this filesystem so it's quite a bit simpler.
				979	*/
				980	static noinline int btrfs_mksubvol(const struct path *parent,
				981	const char *name, int namelen,
				982	struct btrfs_root *snap_src,
				983	u64 *async_transid, bool readonly,
				984	struct btrfs_qgroup_inherit *inherit)
				985	{
				986	struct inode *dir = d_inode(parent->dentry);
				987	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
				988	struct dentry *dentry;
				989	int error;
				990
				991	error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
				992	if (error == -EINTR)
				993	return error;
				994
				995	dentry = lookup_one_len(name, parent->dentry, namelen);
				996	error = PTR_ERR(dentry);
				997	if (IS_ERR(dentry))
				998	goto out_unlock;
				999
				1000	error = btrfs_may_create(dir, dentry);
				1001	if (error)
				1002	goto out_dput;
				1003
				1004	/*
				1005	* even if this name doesn't exist, we may get hash collisions.
				1006	* check for them now when we can safely fail
				1007	*/
				1008	error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
				1009	dir->i_ino, name,
				1010	namelen);
				1011	if (error)
				1012	goto out_dput;
				1013
				1014	down_read(&fs_info->subvol_sem);
				1015
				1016	if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
				1017	goto out_up_read;
				1018
				1019	if (snap_src) {
				1020	error = create_snapshot(snap_src, dir, dentry,
				1021	async_transid, readonly, inherit);
				1022	} else {
				1023	error = create_subvol(dir, dentry, name, namelen,
				1024	async_transid, inherit);
				1025	}
				1026	if (!error)
				1027	fsnotify_mkdir(dir, dentry);
				1028	out_up_read:
				1029	up_read(&fs_info->subvol_sem);
				1030	out_dput:
				1031	dput(dentry);
				1032	out_unlock:
				1033	inode_unlock(dir);
				1034	return error;
				1035	}
				1036
				1037	/*
				1038	* When we're defragging a range, we don't want to kick it off again
				1039	* if it is really just waiting for delalloc to send it down.
				1040	* If we find a nice big extent or delalloc range for the bytes in the
				1041	* file you want to defrag, we return 0 to let you know to skip this
				1042	* part of the file
				1043	*/
				1044	static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh)
				1045	{
				1046	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
				1047	struct extent_map *em = NULL;
				1048	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
				1049	u64 end;
				1050
				1051	read_lock(&em_tree->lock);
				1052	em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
				1053	read_unlock(&em_tree->lock);
				1054
				1055	if (em) {
				1056	end = extent_map_end(em);
				1057	free_extent_map(em);
				1058	if (end - offset > thresh)
				1059	return 0;
				1060	}
				1061	/* if we already have a nice delalloc here, just stop */
				1062	thresh /= 2;
				1063	end = count_range_bits(io_tree, &offset, offset + thresh,
				1064	thresh, EXTENT_DELALLOC, 1);
				1065	if (end >= thresh)
				1066	return 0;
				1067	return 1;
				1068	}
				1069
				1070	/*
				1071	* helper function to walk through a file and find extents
				1072	* newer than a specific transid, and smaller than thresh.
				1073	*
				1074	* This is used by the defragging code to find new and small
				1075	* extents
				1076	*/
				1077	static int find_new_extents(struct btrfs_root *root,
				1078	struct inode *inode, u64 newer_than,
				1079	u64 *off, u32 thresh)
				1080	{
				1081	struct btrfs_path *path;
				1082	struct btrfs_key min_key;
				1083	struct extent_buffer *leaf;
				1084	struct btrfs_file_extent_item *extent;
				1085	int type;
				1086	int ret;
				1087	u64 ino = btrfs_ino(BTRFS_I(inode));
				1088
				1089	path = btrfs_alloc_path();
				1090	if (!path)
				1091	return -ENOMEM;
				1092
				1093	min_key.objectid = ino;
				1094	min_key.type = BTRFS_EXTENT_DATA_KEY;
				1095	min_key.offset = *off;
				1096
				1097	while (1) {
				1098	ret = btrfs_search_forward(root, &min_key, path, newer_than);
				1099	if (ret != 0)
				1100	goto none;
				1101	process_slot:
				1102	if (min_key.objectid != ino)
				1103	goto none;
				1104	if (min_key.type != BTRFS_EXTENT_DATA_KEY)
				1105	goto none;
				1106
				1107	leaf = path->nodes[0];
				1108	extent = btrfs_item_ptr(leaf, path->slots[0],
				1109	struct btrfs_file_extent_item);
				1110
				1111	type = btrfs_file_extent_type(leaf, extent);
				1112	if (type == BTRFS_FILE_EXTENT_REG &&
				1113	btrfs_file_extent_num_bytes(leaf, extent) < thresh &&
				1114	check_defrag_in_cache(inode, min_key.offset, thresh)) {
				1115	*off = min_key.offset;
				1116	btrfs_free_path(path);
				1117	return 0;
				1118	}
				1119
				1120	path->slots[0]++;
				1121	if (path->slots[0] < btrfs_header_nritems(leaf)) {
				1122	btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]);
				1123	goto process_slot;
				1124	}
				1125
				1126	if (min_key.offset == (u64)-1)
				1127	goto none;
				1128
				1129	min_key.offset++;
				1130	btrfs_release_path(path);
				1131	}
				1132	none:
				1133	btrfs_free_path(path);
				1134	return -ENOENT;
				1135	}
				1136
				1137	static struct extent_map defrag_lookup_extent(struct inode inode, u64 start)
				1138	{
				1139	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
				1140	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
				1141	struct extent_map *em;
				1142	u64 len = PAGE_SIZE;
				1143
				1144	/*
				1145	* hopefully we have this extent in the tree already, try without
				1146	* the full extent lock
				1147	*/
				1148	read_lock(&em_tree->lock);
				1149	em = lookup_extent_mapping(em_tree, start, len);
				1150	read_unlock(&em_tree->lock);
				1151
				1152	if (!em) {
				1153	struct extent_state *cached = NULL;
				1154	u64 end = start + len - 1;
				1155
				1156	/* get the big lock and read metadata off disk */
				1157	lock_extent_bits(io_tree, start, end, &cached);
				1158	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
				1159	unlock_extent_cached(io_tree, start, end, &cached);
				1160
				1161	if (IS_ERR(em))
				1162	return NULL;
				1163	}
				1164
				1165	return em;
				1166	}
				1167
				1168	static bool defrag_check_next_extent(struct inode inode, struct extent_map em)
				1169	{
				1170	struct extent_map *next;
				1171	bool ret = true;
				1172
				1173	/* this is the last extent */
				1174	if (em->start + em->len >= i_size_read(inode))
				1175	return false;
				1176
				1177	next = defrag_lookup_extent(inode, em->start + em->len);
				1178	if (!next \|\| next->block_start >= EXTENT_MAP_LAST_BYTE)
				1179	ret = false;
				1180	else if ((em->block_start + em->block_len == next->block_start) &&
				1181	(em->block_len > SZ_128K && next->block_len > SZ_128K))
				1182	ret = false;
				1183
				1184	free_extent_map(next);
				1185	return ret;
				1186	}
				1187
				1188	static int should_defrag_range(struct inode *inode, u64 start, u32 thresh,
				1189	u64 last_len, u64 skip, u64 *defrag_end,
				1190	int compress)
				1191	{
				1192	struct extent_map *em;
				1193	int ret = 1;
				1194	bool next_mergeable = true;
				1195	bool prev_mergeable = true;
				1196
				1197	/*
				1198	* make sure that once we start defragging an extent, we keep on
				1199	* defragging it
				1200	*/
				1201	if (start < *defrag_end)
				1202	return 1;
				1203
				1204	*skip = 0;
				1205
				1206	em = defrag_lookup_extent(inode, start);
				1207	if (!em)
				1208	return 0;
				1209
				1210	/* this will cover holes, and inline extents */
				1211	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
				1212	ret = 0;
				1213	goto out;
				1214	}
				1215
				1216	if (!*defrag_end)
				1217	prev_mergeable = false;
				1218
				1219	next_mergeable = defrag_check_next_extent(inode, em);
				1220	/*
				1221	* we hit a real extent, if it is big or the next extent is not a
				1222	* real extent, don't bother defragging it
				1223	*/
				1224	if (!compress && (last_len == 0 \|\| last_len >= thresh) &&
				1225	(em->len >= thresh \|\| (!next_mergeable && !prev_mergeable)))
				1226	ret = 0;
				1227	out:
				1228	/*
				1229	* last_len ends up being a counter of how many bytes we've defragged.
				1230	* every time we choose not to defrag an extent, we reset *last_len
				1231	* so that the next tiny extent will force a defrag.
				1232	*
				1233	* The end result of this is that tiny extents before a single big
				1234	* extent will force at least part of that big extent to be defragged.
				1235	*/
				1236	if (ret) {
				1237	*defrag_end = extent_map_end(em);
				1238	} else {
				1239	*last_len = 0;
				1240	*skip = extent_map_end(em);
				1241	*defrag_end = 0;
				1242	}
				1243
				1244	free_extent_map(em);
				1245	return ret;
				1246	}
				1247
				1248	/*
				1249	* it doesn't do much good to defrag one or two pages
				1250	* at a time. This pulls in a nice chunk of pages
				1251	* to COW and defrag.
				1252	*
				1253	* It also makes sure the delalloc code has enough
				1254	* dirty data to avoid making new small extents as part
				1255	* of the defrag
				1256	*
				1257	* It's a good idea to start RA on this range
				1258	* before calling this.
				1259	*/
				1260	static int cluster_pages_for_defrag(struct inode *inode,
				1261	struct page **pages,
				1262	unsigned long start_index,
				1263	unsigned long num_pages)
				1264	{
				1265	unsigned long file_end;
				1266	u64 isize = i_size_read(inode);
				1267	u64 page_start;
				1268	u64 page_end;
				1269	u64 page_cnt;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1270	u64 start = (u64)start_index << PAGE_SHIFT;
				1271	u64 search_start;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1272	int ret;
				1273	int i;
				1274	int i_done;
				1275	struct btrfs_ordered_extent *ordered;
				1276	struct extent_state *cached_state = NULL;
				1277	struct extent_io_tree *tree;
				1278	struct extent_changeset *data_reserved = NULL;
				1279	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
				1280
				1281	file_end = (isize - 1) >> PAGE_SHIFT;
				1282	if (!isize \|\| start_index > file_end)
				1283	return 0;
				1284
				1285	page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
				1286
				1287	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1288	start, page_cnt << PAGE_SHIFT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1289	if (ret)
				1290	return ret;
				1291	i_done = 0;
				1292	tree = &BTRFS_I(inode)->io_tree;
				1293
				1294	/* step one, lock all the pages */
				1295	for (i = 0; i < page_cnt; i++) {
				1296	struct page *page;
				1297	again:
				1298	page = find_or_create_page(inode->i_mapping,
				1299	start_index + i, mask);
				1300	if (!page)
				1301	break;
				1302
				1303	page_start = page_offset(page);
				1304	page_end = page_start + PAGE_SIZE - 1;
				1305	while (1) {
				1306	lock_extent_bits(tree, page_start, page_end,
				1307	&cached_state);
				1308	ordered = btrfs_lookup_ordered_extent(inode,
				1309	page_start);
				1310	unlock_extent_cached(tree, page_start, page_end,
				1311	&cached_state);
				1312	if (!ordered)
				1313	break;
				1314
				1315	unlock_page(page);
				1316	btrfs_start_ordered_extent(inode, ordered, 1);
				1317	btrfs_put_ordered_extent(ordered);
				1318	lock_page(page);
				1319	/*
				1320	* we unlocked the page above, so we need check if
				1321	* it was released or not.
				1322	*/
				1323	if (page->mapping != inode->i_mapping) {
				1324	unlock_page(page);
				1325	put_page(page);
				1326	goto again;
				1327	}
				1328	}
				1329
				1330	if (!PageUptodate(page)) {
				1331	btrfs_readpage(NULL, page);
				1332	lock_page(page);
				1333	if (!PageUptodate(page)) {
				1334	unlock_page(page);
				1335	put_page(page);
				1336	ret = -EIO;
				1337	break;
				1338	}
				1339	}
				1340
				1341	if (page->mapping != inode->i_mapping) {
				1342	unlock_page(page);
				1343	put_page(page);
				1344	goto again;
				1345	}
				1346
				1347	pages[i] = page;
				1348	i_done++;
				1349	}
				1350	if (!i_done \|\| ret)
				1351	goto out;
				1352
				1353	if (!(inode->i_sb->s_flags & SB_ACTIVE))
				1354	goto out;
				1355
				1356	/*
				1357	* so now we have a nice long stream of locked
				1358	* and up to date pages, lets wait on them
				1359	*/
				1360	for (i = 0; i < i_done; i++)
				1361	wait_on_page_writeback(pages[i]);
				1362
				1363	page_start = page_offset(pages[0]);
				1364	page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
				1365
				1366	lock_extent_bits(&BTRFS_I(inode)->io_tree,
				1367	page_start, page_end - 1, &cached_state);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1368
				1369	/*
				1370	* When defragmenting we skip ranges that have holes or inline extents,
				1371	* (check should_defrag_range()), to avoid unnecessary IO and wasting
				1372	* space. At btrfs_defrag_file(), we check if a range should be defragged
				1373	* before locking the inode and then, if it should, we trigger a sync
				1374	* page cache readahead - we lock the inode only after that to avoid
				1375	* blocking for too long other tasks that possibly want to operate on
				1376	* other file ranges. But before we were able to get the inode lock,
				1377	* some other task may have punched a hole in the range, or we may have
				1378	* now an inline extent, in which case we should not defrag. So check
				1379	* for that here, where we have the inode and the range locked, and bail
				1380	* out if that happened.
				1381	*/
				1382	search_start = page_start;
				1383	while (search_start < page_end) {
				1384	struct extent_map *em;
				1385
				1386	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
				1387	page_end - search_start, 0);
				1388	if (IS_ERR(em)) {
				1389	ret = PTR_ERR(em);
				1390	goto out_unlock_range;
				1391	}
				1392	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
				1393	free_extent_map(em);
				1394	/* Ok, 0 means we did not defrag anything */
				1395	ret = 0;
				1396	goto out_unlock_range;
				1397	}
				1398	search_start = extent_map_end(em);
				1399	free_extent_map(em);
				1400	}
				1401
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1402	clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1403	page_end - 1, EXTENT_DELALLOC \| EXTENT_DO_ACCOUNTING \|
				1404	EXTENT_DEFRAG, 0, 0, &cached_state);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1405
				1406	if (i_done != page_cnt) {
				1407	spin_lock(&BTRFS_I(inode)->lock);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1408	btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1409	spin_unlock(&BTRFS_I(inode)->lock);
				1410	btrfs_delalloc_release_space(inode, data_reserved,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1411	start, (page_cnt - i_done) << PAGE_SHIFT, true);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1412	}
				1413
				1414
				1415	set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1,
				1416	&cached_state);
				1417
				1418	unlock_extent_cached(&BTRFS_I(inode)->io_tree,
				1419	page_start, page_end - 1, &cached_state);
				1420
				1421	for (i = 0; i < i_done; i++) {
				1422	clear_page_dirty_for_io(pages[i]);
				1423	ClearPageChecked(pages[i]);
				1424	set_page_extent_mapped(pages[i]);
				1425	set_page_dirty(pages[i]);
				1426	unlock_page(pages[i]);
				1427	put_page(pages[i]);
				1428	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1429	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1430	extent_changeset_free(data_reserved);
				1431	return i_done;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1432
				1433	out_unlock_range:
				1434	unlock_extent_cached(&BTRFS_I(inode)->io_tree,
				1435	page_start, page_end - 1, &cached_state);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1436	out:
				1437	for (i = 0; i < i_done; i++) {
				1438	unlock_page(pages[i]);
				1439	put_page(pages[i]);
				1440	}
				1441	btrfs_delalloc_release_space(inode, data_reserved,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1442	start, page_cnt << PAGE_SHIFT, true);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1443	btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1444	extent_changeset_free(data_reserved);
				1445	return ret;
				1446
				1447	}
				1448
				1449	int btrfs_defrag_file(struct inode inode, struct file file,
				1450	struct btrfs_ioctl_defrag_range_args *range,
				1451	u64 newer_than, unsigned long max_to_defrag)
				1452	{
				1453	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				1454	struct btrfs_root *root = BTRFS_I(inode)->root;
				1455	struct file_ra_state *ra = NULL;
				1456	unsigned long last_index;
				1457	u64 isize = i_size_read(inode);
				1458	u64 last_len = 0;
				1459	u64 skip = 0;
				1460	u64 defrag_end = 0;
				1461	u64 newer_off = range->start;
				1462	unsigned long i;
				1463	unsigned long ra_index = 0;
				1464	int ret;
				1465	int defrag_count = 0;
				1466	int compress_type = BTRFS_COMPRESS_ZLIB;
				1467	u32 extent_thresh = range->extent_thresh;
				1468	unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
				1469	unsigned long cluster = max_cluster;
				1470	u64 new_align = ~((u64)SZ_128K - 1);
				1471	struct page **pages = NULL;
				1472	bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
				1473
				1474	if (isize == 0)
				1475	return 0;
				1476
				1477	if (range->start >= isize)
				1478	return -EINVAL;
				1479
				1480	if (do_compress) {
				1481	if (range->compress_type > BTRFS_COMPRESS_TYPES)
				1482	return -EINVAL;
				1483	if (range->compress_type)
				1484	compress_type = range->compress_type;
				1485	}
				1486
				1487	if (extent_thresh == 0)
				1488	extent_thresh = SZ_256K;
				1489
				1490	/*
				1491	* If we were not given a file, allocate a readahead context. As
				1492	* readahead is just an optimization, defrag will work without it so
				1493	* we don't error out.
				1494	*/
				1495	if (!file) {
				1496	ra = kzalloc(sizeof(*ra), GFP_KERNEL);
				1497	if (ra)
				1498	file_ra_state_init(ra, inode->i_mapping);
				1499	} else {
				1500	ra = &file->f_ra;
				1501	}
				1502
				1503	pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL);
				1504	if (!pages) {
				1505	ret = -ENOMEM;
				1506	goto out_ra;
				1507	}
				1508
				1509	/* find the last page to defrag */
				1510	if (range->start + range->len > range->start) {
				1511	last_index = min_t(u64, isize - 1,
				1512	range->start + range->len - 1) >> PAGE_SHIFT;
				1513	} else {
				1514	last_index = (isize - 1) >> PAGE_SHIFT;
				1515	}
				1516
				1517	if (newer_than) {
				1518	ret = find_new_extents(root, inode, newer_than,
				1519	&newer_off, SZ_64K);
				1520	if (!ret) {
				1521	range->start = newer_off;
				1522	/*
				1523	* we always align our defrag to help keep
				1524	* the extents in the file evenly spaced
				1525	*/
				1526	i = (newer_off & new_align) >> PAGE_SHIFT;
				1527	} else
				1528	goto out_ra;
				1529	} else {
				1530	i = range->start >> PAGE_SHIFT;
				1531	}
				1532	if (!max_to_defrag)
				1533	max_to_defrag = last_index - i + 1;
				1534
				1535	/*
				1536	* make writeback starts from i, so the defrag range can be
				1537	* written sequentially.
				1538	*/
				1539	if (i < inode->i_mapping->writeback_index)
				1540	inode->i_mapping->writeback_index = i;
				1541
				1542	while (i <= last_index && defrag_count < max_to_defrag &&
				1543	(i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
				1544	/*
				1545	* make sure we stop running if someone unmounts
				1546	* the FS
				1547	*/
				1548	if (!(inode->i_sb->s_flags & SB_ACTIVE))
				1549	break;
				1550
				1551	if (btrfs_defrag_cancelled(fs_info)) {
				1552	btrfs_debug(fs_info, "defrag_file cancelled");
				1553	ret = -EAGAIN;
				1554	break;
				1555	}
				1556
				1557	if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
				1558	extent_thresh, &last_len, &skip,
				1559	&defrag_end, do_compress)){
				1560	unsigned long next;
				1561	/*
				1562	* the should_defrag function tells us how much to skip
				1563	* bump our counter by the suggested amount
				1564	*/
				1565	next = DIV_ROUND_UP(skip, PAGE_SIZE);
				1566	i = max(i + 1, next);
				1567	continue;
				1568	}
				1569
				1570	if (!newer_than) {
				1571	cluster = (PAGE_ALIGN(defrag_end) >>
				1572	PAGE_SHIFT) - i;
				1573	cluster = min(cluster, max_cluster);
				1574	} else {
				1575	cluster = max_cluster;
				1576	}
				1577
				1578	if (i + cluster > ra_index) {
				1579	ra_index = max(i, ra_index);
				1580	if (ra)
				1581	page_cache_sync_readahead(inode->i_mapping, ra,
				1582	file, ra_index, cluster);
				1583	ra_index += cluster;
				1584	}
				1585
				1586	inode_lock(inode);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1587	if (IS_SWAPFILE(inode)) {
				1588	ret = -ETXTBSY;
				1589	} else {
				1590	if (do_compress)
				1591	BTRFS_I(inode)->defrag_compress = compress_type;
				1592	ret = cluster_pages_for_defrag(inode, pages, i, cluster);
				1593	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1594	if (ret < 0) {
				1595	inode_unlock(inode);
				1596	goto out_ra;
				1597	}
				1598
				1599	defrag_count += ret;
				1600	balance_dirty_pages_ratelimited(inode->i_mapping);
				1601	inode_unlock(inode);
				1602
				1603	if (newer_than) {
				1604	if (newer_off == (u64)-1)
				1605	break;
				1606
				1607	if (ret > 0)
				1608	i += ret;
				1609
				1610	newer_off = max(newer_off + 1,
				1611	(u64)i << PAGE_SHIFT);
				1612
				1613	ret = find_new_extents(root, inode, newer_than,
				1614	&newer_off, SZ_64K);
				1615	if (!ret) {
				1616	range->start = newer_off;
				1617	i = (newer_off & new_align) >> PAGE_SHIFT;
				1618	} else {
				1619	break;
				1620	}
				1621	} else {
				1622	if (ret > 0) {
				1623	i += ret;
				1624	last_len += ret << PAGE_SHIFT;
				1625	} else {
				1626	i++;
				1627	last_len = 0;
				1628	}
				1629	}
				1630	}
				1631
				1632	if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
				1633	filemap_flush(inode->i_mapping);
				1634	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
				1635	&BTRFS_I(inode)->runtime_flags))
				1636	filemap_flush(inode->i_mapping);
				1637	}
				1638
				1639	if (range->compress_type == BTRFS_COMPRESS_LZO) {
				1640	btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
				1641	} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
				1642	btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
				1643	}
				1644
				1645	ret = defrag_count;
				1646
				1647	out_ra:
				1648	if (do_compress) {
				1649	inode_lock(inode);
				1650	BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE;
				1651	inode_unlock(inode);
				1652	}
				1653	if (!file)
				1654	kfree(ra);
				1655	kfree(pages);
				1656	return ret;
				1657	}
				1658
				1659	static noinline int btrfs_ioctl_resize(struct file *file,
				1660	void __user *arg)
				1661	{
				1662	struct inode *inode = file_inode(file);
				1663	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				1664	u64 new_size;
				1665	u64 old_size;
				1666	u64 devid = 1;
				1667	struct btrfs_root *root = BTRFS_I(inode)->root;
				1668	struct btrfs_ioctl_vol_args *vol_args;
				1669	struct btrfs_trans_handle *trans;
				1670	struct btrfs_device *device = NULL;
				1671	char *sizestr;
				1672	char *retptr;
				1673	char *devstr = NULL;
				1674	int ret = 0;
				1675	int mod = 0;
				1676
				1677	if (!capable(CAP_SYS_ADMIN))
				1678	return -EPERM;
				1679
				1680	ret = mnt_want_write_file(file);
				1681	if (ret)
				1682	return ret;
				1683
				1684	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
				1685	mnt_drop_write_file(file);
				1686	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				1687	}
				1688
				1689	vol_args = memdup_user(arg, sizeof(*vol_args));
				1690	if (IS_ERR(vol_args)) {
				1691	ret = PTR_ERR(vol_args);
				1692	goto out;
				1693	}
				1694
				1695	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
				1696
				1697	sizestr = vol_args->name;
				1698	devstr = strchr(sizestr, ':');
				1699	if (devstr) {
				1700	sizestr = devstr + 1;
				1701	*devstr = '\0';
				1702	devstr = vol_args->name;
				1703	ret = kstrtoull(devstr, 10, &devid);
				1704	if (ret)
				1705	goto out_free;
				1706	if (!devid) {
				1707	ret = -EINVAL;
				1708	goto out_free;
				1709	}
				1710	btrfs_info(fs_info, "resizing devid %llu", devid);
				1711	}
				1712
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1713	device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1714	if (!device) {
				1715	btrfs_info(fs_info, "resizer unable to find device %llu",
				1716	devid);
				1717	ret = -ENODEV;
				1718	goto out_free;
				1719	}
				1720
				1721	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
				1722	btrfs_info(fs_info,
				1723	"resizer unable to apply on readonly device %llu",
				1724	devid);
				1725	ret = -EPERM;
				1726	goto out_free;
				1727	}
				1728
				1729	if (!strcmp(sizestr, "max"))
				1730	new_size = device->bdev->bd_inode->i_size;
				1731	else {
				1732	if (sizestr[0] == '-') {
				1733	mod = -1;
				1734	sizestr++;
				1735	} else if (sizestr[0] == '+') {
				1736	mod = 1;
				1737	sizestr++;
				1738	}
				1739	new_size = memparse(sizestr, &retptr);
				1740	if (*retptr != '\0' \|\| new_size == 0) {
				1741	ret = -EINVAL;
				1742	goto out_free;
				1743	}
				1744	}
				1745
				1746	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
				1747	ret = -EPERM;
				1748	goto out_free;
				1749	}
				1750
				1751	old_size = btrfs_device_get_total_bytes(device);
				1752
				1753	if (mod < 0) {
				1754	if (new_size > old_size) {
				1755	ret = -EINVAL;
				1756	goto out_free;
				1757	}
				1758	new_size = old_size - new_size;
				1759	} else if (mod > 0) {
				1760	if (new_size > ULLONG_MAX - old_size) {
				1761	ret = -ERANGE;
				1762	goto out_free;
				1763	}
				1764	new_size = old_size + new_size;
				1765	}
				1766
				1767	if (new_size < SZ_256M) {
				1768	ret = -EINVAL;
				1769	goto out_free;
				1770	}
				1771	if (new_size > device->bdev->bd_inode->i_size) {
				1772	ret = -EFBIG;
				1773	goto out_free;
				1774	}
				1775
				1776	new_size = round_down(new_size, fs_info->sectorsize);
				1777
				1778	btrfs_info_in_rcu(fs_info, "new size for %s is %llu",
				1779	rcu_str_deref(device->name), new_size);
				1780
				1781	if (new_size > old_size) {
				1782	trans = btrfs_start_transaction(root, 0);
				1783	if (IS_ERR(trans)) {
				1784	ret = PTR_ERR(trans);
				1785	goto out_free;
				1786	}
				1787	ret = btrfs_grow_device(trans, device, new_size);
				1788	btrfs_commit_transaction(trans);
				1789	} else if (new_size < old_size) {
				1790	ret = btrfs_shrink_device(device, new_size);
				1791	} /* equal, nothing need to do */
				1792
				1793	out_free:
				1794	kfree(vol_args);
				1795	out:
				1796	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				1797	mnt_drop_write_file(file);
				1798	return ret;
				1799	}
				1800
				1801	static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
				1802	const char *name, unsigned long fd, int subvol,
				1803	u64 *transid, bool readonly,
				1804	struct btrfs_qgroup_inherit *inherit)
				1805	{
				1806	int namelen;
				1807	int ret = 0;
				1808
				1809	if (!S_ISDIR(file_inode(file)->i_mode))
				1810	return -ENOTDIR;
				1811
				1812	ret = mnt_want_write_file(file);
				1813	if (ret)
				1814	goto out;
				1815
				1816	namelen = strlen(name);
				1817	if (strchr(name, '/')) {
				1818	ret = -EINVAL;
				1819	goto out_drop_write;
				1820	}
				1821
				1822	if (name[0] == '.' &&
				1823	(namelen == 1 \|\| (name[1] == '.' && namelen == 2))) {
				1824	ret = -EEXIST;
				1825	goto out_drop_write;
				1826	}
				1827
				1828	if (subvol) {
				1829	ret = btrfs_mksubvol(&file->f_path, name, namelen,
				1830	NULL, transid, readonly, inherit);
				1831	} else {
				1832	struct fd src = fdget(fd);
				1833	struct inode *src_inode;
				1834	if (!src.file) {
				1835	ret = -EINVAL;
				1836	goto out_drop_write;
				1837	}
				1838
				1839	src_inode = file_inode(src.file);
				1840	if (src_inode->i_sb != file_inode(file)->i_sb) {
				1841	btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
				1842	"Snapshot src from another FS");
				1843	ret = -EXDEV;
				1844	} else if (!inode_owner_or_capable(src_inode)) {
				1845	/*
				1846	* Subvolume creation is not restricted, but snapshots
				1847	* are limited to own subvolumes only
				1848	*/
				1849	ret = -EPERM;
				1850	} else {
				1851	ret = btrfs_mksubvol(&file->f_path, name, namelen,
				1852	BTRFS_I(src_inode)->root,
				1853	transid, readonly, inherit);
				1854	}
				1855	fdput(src);
				1856	}
				1857	out_drop_write:
				1858	mnt_drop_write_file(file);
				1859	out:
				1860	return ret;
				1861	}
				1862
				1863	static noinline int btrfs_ioctl_snap_create(struct file *file,
				1864	void __user *arg, int subvol)
				1865	{
				1866	struct btrfs_ioctl_vol_args *vol_args;
				1867	int ret;
				1868
				1869	if (!S_ISDIR(file_inode(file)->i_mode))
				1870	return -ENOTDIR;
				1871
				1872	vol_args = memdup_user(arg, sizeof(*vol_args));
				1873	if (IS_ERR(vol_args))
				1874	return PTR_ERR(vol_args);
				1875	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
				1876
				1877	ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
				1878	vol_args->fd, subvol,
				1879	NULL, false, NULL);
				1880
				1881	kfree(vol_args);
				1882	return ret;
				1883	}
				1884
				1885	static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
				1886	void __user *arg, int subvol)
				1887	{
				1888	struct btrfs_ioctl_vol_args_v2 *vol_args;
				1889	int ret;
				1890	u64 transid = 0;
				1891	u64 *ptr = NULL;
				1892	bool readonly = false;
				1893	struct btrfs_qgroup_inherit *inherit = NULL;
				1894
				1895	if (!S_ISDIR(file_inode(file)->i_mode))
				1896	return -ENOTDIR;
				1897
				1898	vol_args = memdup_user(arg, sizeof(*vol_args));
				1899	if (IS_ERR(vol_args))
				1900	return PTR_ERR(vol_args);
				1901	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
				1902
				1903	if (vol_args->flags &
				1904	~(BTRFS_SUBVOL_CREATE_ASYNC \| BTRFS_SUBVOL_RDONLY \|
				1905	BTRFS_SUBVOL_QGROUP_INHERIT)) {
				1906	ret = -EOPNOTSUPP;
				1907	goto free_args;
				1908	}
				1909
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1910	if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) {
				1911	struct inode *inode = file_inode(file);
				1912	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				1913
				1914	btrfs_warn(fs_info,
				1915	"SNAP_CREATE_V2 ioctl with CREATE_ASYNC is deprecated and will be removed in kernel 5.7");
				1916
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1917	ptr = &transid;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1918	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1919	if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
				1920	readonly = true;
				1921	if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1922	u64 nums;
				1923
				1924	if (vol_args->size < sizeof(*inherit) \|\|
				1925	vol_args->size > PAGE_SIZE) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1926	ret = -EINVAL;
				1927	goto free_args;
				1928	}
				1929	inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
				1930	if (IS_ERR(inherit)) {
				1931	ret = PTR_ERR(inherit);
				1932	goto free_args;
				1933	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	1934
				1935	if (inherit->num_qgroups > PAGE_SIZE \|\|
				1936	inherit->num_ref_copies > PAGE_SIZE \|\|
				1937	inherit->num_excl_copies > PAGE_SIZE) {
				1938	ret = -EINVAL;
				1939	goto free_inherit;
				1940	}
				1941
				1942	nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
				1943	2 * inherit->num_excl_copies;
				1944	if (vol_args->size != struct_size(inherit, qgroups, nums)) {
				1945	ret = -EINVAL;
				1946	goto free_inherit;
				1947	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1948	}
				1949
				1950	ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
				1951	vol_args->fd, subvol, ptr,
				1952	readonly, inherit);
				1953	if (ret)
				1954	goto free_inherit;
				1955
				1956	if (ptr && copy_to_user(arg +
				1957	offsetof(struct btrfs_ioctl_vol_args_v2,
				1958	transid),
				1959	ptr, sizeof(*ptr)))
				1960	ret = -EFAULT;
				1961
				1962	free_inherit:
				1963	kfree(inherit);
				1964	free_args:
				1965	kfree(vol_args);
				1966	return ret;
				1967	}
				1968
				1969	static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
				1970	void __user *arg)
				1971	{
				1972	struct inode *inode = file_inode(file);
				1973	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				1974	struct btrfs_root *root = BTRFS_I(inode)->root;
				1975	int ret = 0;
				1976	u64 flags = 0;
				1977
				1978	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID)
				1979	return -EINVAL;
				1980
				1981	down_read(&fs_info->subvol_sem);
				1982	if (btrfs_root_readonly(root))
				1983	flags \|= BTRFS_SUBVOL_RDONLY;
				1984	up_read(&fs_info->subvol_sem);
				1985
				1986	if (copy_to_user(arg, &flags, sizeof(flags)))
				1987	ret = -EFAULT;
				1988
				1989	return ret;
				1990	}
				1991
				1992	static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
				1993	void __user *arg)
				1994	{
				1995	struct inode *inode = file_inode(file);
				1996	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				1997	struct btrfs_root *root = BTRFS_I(inode)->root;
				1998	struct btrfs_trans_handle *trans;
				1999	u64 root_flags;
				2000	u64 flags;
				2001	int ret = 0;
				2002
				2003	if (!inode_owner_or_capable(inode))
				2004	return -EPERM;
				2005
				2006	ret = mnt_want_write_file(file);
				2007	if (ret)
				2008	goto out;
				2009
				2010	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
				2011	ret = -EINVAL;
				2012	goto out_drop_write;
				2013	}
				2014
				2015	if (copy_from_user(&flags, arg, sizeof(flags))) {
				2016	ret = -EFAULT;
				2017	goto out_drop_write;
				2018	}
				2019
				2020	if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
				2021	ret = -EINVAL;
				2022	goto out_drop_write;
				2023	}
				2024
				2025	if (flags & ~BTRFS_SUBVOL_RDONLY) {
				2026	ret = -EOPNOTSUPP;
				2027	goto out_drop_write;
				2028	}
				2029
				2030	down_write(&fs_info->subvol_sem);
				2031
				2032	/* nothing to do */
				2033	if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
				2034	goto out_drop_sem;
				2035
				2036	root_flags = btrfs_root_flags(&root->root_item);
				2037	if (flags & BTRFS_SUBVOL_RDONLY) {
				2038	btrfs_set_root_flags(&root->root_item,
				2039	root_flags \| BTRFS_ROOT_SUBVOL_RDONLY);
				2040	} else {
				2041	/*
				2042	* Block RO -> RW transition if this subvolume is involved in
				2043	* send
				2044	*/
				2045	spin_lock(&root->root_item_lock);
				2046	if (root->send_in_progress == 0) {
				2047	btrfs_set_root_flags(&root->root_item,
				2048	root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
				2049	spin_unlock(&root->root_item_lock);
				2050	} else {
				2051	spin_unlock(&root->root_item_lock);
				2052	btrfs_warn(fs_info,
				2053	"Attempt to set subvolume %llu read-write during send",
				2054	root->root_key.objectid);
				2055	ret = -EPERM;
				2056	goto out_drop_sem;
				2057	}
				2058	}
				2059
				2060	trans = btrfs_start_transaction(root, 1);
				2061	if (IS_ERR(trans)) {
				2062	ret = PTR_ERR(trans);
				2063	goto out_reset;
				2064	}
				2065
				2066	ret = btrfs_update_root(trans, fs_info->tree_root,
				2067	&root->root_key, &root->root_item);
				2068	if (ret < 0) {
				2069	btrfs_end_transaction(trans);
				2070	goto out_reset;
				2071	}
				2072
				2073	ret = btrfs_commit_transaction(trans);
				2074
				2075	out_reset:
				2076	if (ret)
				2077	btrfs_set_root_flags(&root->root_item, root_flags);
				2078	out_drop_sem:
				2079	up_write(&fs_info->subvol_sem);
				2080	out_drop_write:
				2081	mnt_drop_write_file(file);
				2082	out:
				2083	return ret;
				2084	}
				2085
				2086	static noinline int key_in_sk(struct btrfs_key *key,
				2087	struct btrfs_ioctl_search_key *sk)
				2088	{
				2089	struct btrfs_key test;
				2090	int ret;
				2091
				2092	test.objectid = sk->min_objectid;
				2093	test.type = sk->min_type;
				2094	test.offset = sk->min_offset;
				2095
				2096	ret = btrfs_comp_cpu_keys(key, &test);
				2097	if (ret < 0)
				2098	return 0;
				2099
				2100	test.objectid = sk->max_objectid;
				2101	test.type = sk->max_type;
				2102	test.offset = sk->max_offset;
				2103
				2104	ret = btrfs_comp_cpu_keys(key, &test);
				2105	if (ret > 0)
				2106	return 0;
				2107	return 1;
				2108	}
				2109
				2110	static noinline int copy_to_sk(struct btrfs_path *path,
				2111	struct btrfs_key *key,
				2112	struct btrfs_ioctl_search_key *sk,
				2113	size_t *buf_size,
				2114	char __user *ubuf,
				2115	unsigned long *sk_offset,
				2116	int *num_found)
				2117	{
				2118	u64 found_transid;
				2119	struct extent_buffer *leaf;
				2120	struct btrfs_ioctl_search_header sh;
				2121	struct btrfs_key test;
				2122	unsigned long item_off;
				2123	unsigned long item_len;
				2124	int nritems;
				2125	int i;
				2126	int slot;
				2127	int ret = 0;
				2128
				2129	leaf = path->nodes[0];
				2130	slot = path->slots[0];
				2131	nritems = btrfs_header_nritems(leaf);
				2132
				2133	if (btrfs_header_generation(leaf) > sk->max_transid) {
				2134	i = nritems;
				2135	goto advance_key;
				2136	}
				2137	found_transid = btrfs_header_generation(leaf);
				2138
				2139	for (i = slot; i < nritems; i++) {
				2140	item_off = btrfs_item_ptr_offset(leaf, i);
				2141	item_len = btrfs_item_size_nr(leaf, i);
				2142
				2143	btrfs_item_key_to_cpu(leaf, key, i);
				2144	if (!key_in_sk(key, sk))
				2145	continue;
				2146
				2147	if (sizeof(sh) + item_len > *buf_size) {
				2148	if (*num_found) {
				2149	ret = 1;
				2150	goto out;
				2151	}
				2152
				2153	/*
				2154	* return one empty item back for v1, which does not
				2155	* handle -EOVERFLOW
				2156	*/
				2157
				2158	*buf_size = sizeof(sh) + item_len;
				2159	item_len = 0;
				2160	ret = -EOVERFLOW;
				2161	}
				2162
				2163	if (sizeof(sh) + item_len + sk_offset > buf_size) {
				2164	ret = 1;
				2165	goto out;
				2166	}
				2167
				2168	sh.objectid = key->objectid;
				2169	sh.offset = key->offset;
				2170	sh.type = key->type;
				2171	sh.len = item_len;
				2172	sh.transid = found_transid;
				2173
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2174	/*
				2175	* Copy search result header. If we fault then loop again so we
				2176	* can fault in the pages and -EFAULT there if there's a
				2177	* problem. Otherwise we'll fault and then copy the buffer in
				2178	* properly this next time through
				2179	*/
				2180	if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
				2181	ret = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2182	goto out;
				2183	}
				2184
				2185	*sk_offset += sizeof(sh);
				2186
				2187	if (item_len) {
				2188	char __user up = ubuf + sk_offset;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2189	/*
				2190	* Copy the item, same behavior as above, but reset the
				2191	* * sk_offset so we copy the full thing again.
				2192	*/
				2193	if (read_extent_buffer_to_user_nofault(leaf, up,
				2194	item_off, item_len)) {
				2195	ret = 0;
				2196	*sk_offset -= sizeof(sh);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2197	goto out;
				2198	}
				2199
				2200	*sk_offset += item_len;
				2201	}
				2202	(*num_found)++;
				2203
				2204	if (ret) /* -EOVERFLOW from above */
				2205	goto out;
				2206
				2207	if (*num_found >= sk->nr_items) {
				2208	ret = 1;
				2209	goto out;
				2210	}
				2211	}
				2212	advance_key:
				2213	ret = 0;
				2214	test.objectid = sk->max_objectid;
				2215	test.type = sk->max_type;
				2216	test.offset = sk->max_offset;
				2217	if (btrfs_comp_cpu_keys(key, &test) >= 0)
				2218	ret = 1;
				2219	else if (key->offset < (u64)-1)
				2220	key->offset++;
				2221	else if (key->type < (u8)-1) {
				2222	key->offset = 0;
				2223	key->type++;
				2224	} else if (key->objectid < (u64)-1) {
				2225	key->offset = 0;
				2226	key->type = 0;
				2227	key->objectid++;
				2228	} else
				2229	ret = 1;
				2230	out:
				2231	/*
				2232	* 0: all items from this leaf copied, continue with next
				2233	* 1: * more items can be copied, but unused buffer is too small
				2234	* * all items were found
				2235	* Either way, it will stops the loop which iterates to the next
				2236	* leaf
				2237	* -EOVERFLOW: item was to large for buffer
				2238	* -EFAULT: could not copy extent buffer back to userspace
				2239	*/
				2240	return ret;
				2241	}
				2242
				2243	static noinline int search_ioctl(struct inode *inode,
				2244	struct btrfs_ioctl_search_key *sk,
				2245	size_t *buf_size,
				2246	char __user *ubuf)
				2247	{
				2248	struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
				2249	struct btrfs_root *root;
				2250	struct btrfs_key key;
				2251	struct btrfs_path *path;
				2252	int ret;
				2253	int num_found = 0;
				2254	unsigned long sk_offset = 0;
				2255
				2256	if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
				2257	*buf_size = sizeof(struct btrfs_ioctl_search_header);
				2258	return -EOVERFLOW;
				2259	}
				2260
				2261	path = btrfs_alloc_path();
				2262	if (!path)
				2263	return -ENOMEM;
				2264
				2265	if (sk->tree_id == 0) {
				2266	/* search the root of the inode that was passed */
				2267	root = BTRFS_I(inode)->root;
				2268	} else {
				2269	key.objectid = sk->tree_id;
				2270	key.type = BTRFS_ROOT_ITEM_KEY;
				2271	key.offset = (u64)-1;
				2272	root = btrfs_read_fs_root_no_name(info, &key);
				2273	if (IS_ERR(root)) {
				2274	btrfs_free_path(path);
				2275	return PTR_ERR(root);
				2276	}
				2277	}
				2278
				2279	key.objectid = sk->min_objectid;
				2280	key.type = sk->min_type;
				2281	key.offset = sk->min_offset;
				2282
				2283	while (1) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	2284	ret = fault_in_pages_writeable(ubuf + sk_offset,
				2285	*buf_size - sk_offset);
				2286	if (ret)
				2287	break;
				2288
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2289	ret = btrfs_search_forward(root, &key, path, sk->min_transid);
				2290	if (ret != 0) {
				2291	if (ret > 0)
				2292	ret = 0;
				2293	goto err;
				2294	}
				2295	ret = copy_to_sk(path, &key, sk, buf_size, ubuf,
				2296	&sk_offset, &num_found);
				2297	btrfs_release_path(path);
				2298	if (ret)
				2299	break;
				2300
				2301	}
				2302	if (ret > 0)
				2303	ret = 0;
				2304	err:
				2305	sk->nr_items = num_found;
				2306	btrfs_free_path(path);
				2307	return ret;
				2308	}
				2309
				2310	static noinline int btrfs_ioctl_tree_search(struct file *file,
				2311	void __user *argp)
				2312	{
				2313	struct btrfs_ioctl_search_args __user *uargs;
				2314	struct btrfs_ioctl_search_key sk;
				2315	struct inode *inode;
				2316	int ret;
				2317	size_t buf_size;
				2318
				2319	if (!capable(CAP_SYS_ADMIN))
				2320	return -EPERM;
				2321
				2322	uargs = (struct btrfs_ioctl_search_args __user *)argp;
				2323
				2324	if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
				2325	return -EFAULT;
				2326
				2327	buf_size = sizeof(uargs->buf);
				2328
				2329	inode = file_inode(file);
				2330	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
				2331
				2332	/*
				2333	* In the origin implementation an overflow is handled by returning a
				2334	* search header with a len of zero, so reset ret.
				2335	*/
				2336	if (ret == -EOVERFLOW)
				2337	ret = 0;
				2338
				2339	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
				2340	ret = -EFAULT;
				2341	return ret;
				2342	}
				2343
				2344	static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
				2345	void __user *argp)
				2346	{
				2347	struct btrfs_ioctl_search_args_v2 __user *uarg;
				2348	struct btrfs_ioctl_search_args_v2 args;
				2349	struct inode *inode;
				2350	int ret;
				2351	size_t buf_size;
				2352	const size_t buf_limit = SZ_16M;
				2353
				2354	if (!capable(CAP_SYS_ADMIN))
				2355	return -EPERM;
				2356
				2357	/* copy search header and buffer size */
				2358	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
				2359	if (copy_from_user(&args, uarg, sizeof(args)))
				2360	return -EFAULT;
				2361
				2362	buf_size = args.buf_size;
				2363
				2364	/* limit result size to 16MB */
				2365	if (buf_size > buf_limit)
				2366	buf_size = buf_limit;
				2367
				2368	inode = file_inode(file);
				2369	ret = search_ioctl(inode, &args.key, &buf_size,
				2370	(char __user *)(&uarg->buf[0]));
				2371	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
				2372	ret = -EFAULT;
				2373	else if (ret == -EOVERFLOW &&
				2374	copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
				2375	ret = -EFAULT;
				2376
				2377	return ret;
				2378	}
				2379
				2380	/*
				2381	* Search INODE_REFs to identify path name of 'dirid' directory
				2382	* in a 'tree_id' tree. and sets path name to 'name'.
				2383	*/
				2384	static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
				2385	u64 tree_id, u64 dirid, char *name)
				2386	{
				2387	struct btrfs_root *root;
				2388	struct btrfs_key key;
				2389	char *ptr;
				2390	int ret = -1;
				2391	int slot;
				2392	int len;
				2393	int total_len = 0;
				2394	struct btrfs_inode_ref *iref;
				2395	struct extent_buffer *l;
				2396	struct btrfs_path *path;
				2397
				2398	if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
				2399	name[0]='\0';
				2400	return 0;
				2401	}
				2402
				2403	path = btrfs_alloc_path();
				2404	if (!path)
				2405	return -ENOMEM;
				2406
				2407	ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
				2408
				2409	key.objectid = tree_id;
				2410	key.type = BTRFS_ROOT_ITEM_KEY;
				2411	key.offset = (u64)-1;
				2412	root = btrfs_read_fs_root_no_name(info, &key);
				2413	if (IS_ERR(root)) {
				2414	ret = PTR_ERR(root);
				2415	goto out;
				2416	}
				2417
				2418	key.objectid = dirid;
				2419	key.type = BTRFS_INODE_REF_KEY;
				2420	key.offset = (u64)-1;
				2421
				2422	while (1) {
				2423	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2424	if (ret < 0)
				2425	goto out;
				2426	else if (ret > 0) {
				2427	ret = btrfs_previous_item(root, path, dirid,
				2428	BTRFS_INODE_REF_KEY);
				2429	if (ret < 0)
				2430	goto out;
				2431	else if (ret > 0) {
				2432	ret = -ENOENT;
				2433	goto out;
				2434	}
				2435	}
				2436
				2437	l = path->nodes[0];
				2438	slot = path->slots[0];
				2439	btrfs_item_key_to_cpu(l, &key, slot);
				2440
				2441	iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
				2442	len = btrfs_inode_ref_name_len(l, iref);
				2443	ptr -= len + 1;
				2444	total_len += len + 1;
				2445	if (ptr < name) {
				2446	ret = -ENAMETOOLONG;
				2447	goto out;
				2448	}
				2449
				2450	*(ptr + len) = '/';
				2451	read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
				2452
				2453	if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
				2454	break;
				2455
				2456	btrfs_release_path(path);
				2457	key.objectid = key.offset;
				2458	key.offset = (u64)-1;
				2459	dirid = key.objectid;
				2460	}
				2461	memmove(name, ptr, total_len);
				2462	name[total_len] = '\0';
				2463	ret = 0;
				2464	out:
				2465	btrfs_free_path(path);
				2466	return ret;
				2467	}
				2468
				2469	static int btrfs_search_path_in_tree_user(struct inode *inode,
				2470	struct btrfs_ioctl_ino_lookup_user_args *args)
				2471	{
				2472	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
				2473	struct super_block *sb = inode->i_sb;
				2474	struct btrfs_key upper_limit = BTRFS_I(inode)->location;
				2475	u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
				2476	u64 dirid = args->dirid;
				2477	unsigned long item_off;
				2478	unsigned long item_len;
				2479	struct btrfs_inode_ref *iref;
				2480	struct btrfs_root_ref *rref;
				2481	struct btrfs_root *root;
				2482	struct btrfs_path *path;
				2483	struct btrfs_key key, key2;
				2484	struct extent_buffer *leaf;
				2485	struct inode *temp_inode;
				2486	char *ptr;
				2487	int slot;
				2488	int len;
				2489	int total_len = 0;
				2490	int ret;
				2491
				2492	path = btrfs_alloc_path();
				2493	if (!path)
				2494	return -ENOMEM;
				2495
				2496	/*
				2497	* If the bottom subvolume does not exist directly under upper_limit,
				2498	* construct the path in from the bottom up.
				2499	*/
				2500	if (dirid != upper_limit.objectid) {
				2501	ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
				2502
				2503	key.objectid = treeid;
				2504	key.type = BTRFS_ROOT_ITEM_KEY;
				2505	key.offset = (u64)-1;
				2506	root = btrfs_read_fs_root_no_name(fs_info, &key);
				2507	if (IS_ERR(root)) {
				2508	ret = PTR_ERR(root);
				2509	goto out;
				2510	}
				2511
				2512	key.objectid = dirid;
				2513	key.type = BTRFS_INODE_REF_KEY;
				2514	key.offset = (u64)-1;
				2515	while (1) {
				2516	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2517	if (ret < 0) {
				2518	goto out;
				2519	} else if (ret > 0) {
				2520	ret = btrfs_previous_item(root, path, dirid,
				2521	BTRFS_INODE_REF_KEY);
				2522	if (ret < 0) {
				2523	goto out;
				2524	} else if (ret > 0) {
				2525	ret = -ENOENT;
				2526	goto out;
				2527	}
				2528	}
				2529
				2530	leaf = path->nodes[0];
				2531	slot = path->slots[0];
				2532	btrfs_item_key_to_cpu(leaf, &key, slot);
				2533
				2534	iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
				2535	len = btrfs_inode_ref_name_len(leaf, iref);
				2536	ptr -= len + 1;
				2537	total_len += len + 1;
				2538	if (ptr < args->path) {
				2539	ret = -ENAMETOOLONG;
				2540	goto out;
				2541	}
				2542
				2543	*(ptr + len) = '/';
				2544	read_extent_buffer(leaf, ptr,
				2545	(unsigned long)(iref + 1), len);
				2546
				2547	/* Check the read+exec permission of this directory */
				2548	ret = btrfs_previous_item(root, path, dirid,
				2549	BTRFS_INODE_ITEM_KEY);
				2550	if (ret < 0) {
				2551	goto out;
				2552	} else if (ret > 0) {
				2553	ret = -ENOENT;
				2554	goto out;
				2555	}
				2556
				2557	leaf = path->nodes[0];
				2558	slot = path->slots[0];
				2559	btrfs_item_key_to_cpu(leaf, &key2, slot);
				2560	if (key2.objectid != dirid) {
				2561	ret = -ENOENT;
				2562	goto out;
				2563	}
				2564
				2565	temp_inode = btrfs_iget(sb, &key2, root, NULL);
				2566	if (IS_ERR(temp_inode)) {
				2567	ret = PTR_ERR(temp_inode);
				2568	goto out;
				2569	}
				2570	ret = inode_permission(temp_inode, MAY_READ \| MAY_EXEC);
				2571	iput(temp_inode);
				2572	if (ret) {
				2573	ret = -EACCES;
				2574	goto out;
				2575	}
				2576
				2577	if (key.offset == upper_limit.objectid)
				2578	break;
				2579	if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
				2580	ret = -EACCES;
				2581	goto out;
				2582	}
				2583
				2584	btrfs_release_path(path);
				2585	key.objectid = key.offset;
				2586	key.offset = (u64)-1;
				2587	dirid = key.objectid;
				2588	}
				2589
				2590	memmove(args->path, ptr, total_len);
				2591	args->path[total_len] = '\0';
				2592	btrfs_release_path(path);
				2593	}
				2594
				2595	/* Get the bottom subvolume's name from ROOT_REF */
				2596	root = fs_info->tree_root;
				2597	key.objectid = treeid;
				2598	key.type = BTRFS_ROOT_REF_KEY;
				2599	key.offset = args->treeid;
				2600	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2601	if (ret < 0) {
				2602	goto out;
				2603	} else if (ret > 0) {
				2604	ret = -ENOENT;
				2605	goto out;
				2606	}
				2607
				2608	leaf = path->nodes[0];
				2609	slot = path->slots[0];
				2610	btrfs_item_key_to_cpu(leaf, &key, slot);
				2611
				2612	item_off = btrfs_item_ptr_offset(leaf, slot);
				2613	item_len = btrfs_item_size_nr(leaf, slot);
				2614	/* Check if dirid in ROOT_REF corresponds to passed dirid */
				2615	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
				2616	if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
				2617	ret = -EINVAL;
				2618	goto out;
				2619	}
				2620
				2621	/* Copy subvolume's name */
				2622	item_off += sizeof(struct btrfs_root_ref);
				2623	item_len -= sizeof(struct btrfs_root_ref);
				2624	read_extent_buffer(leaf, args->name, item_off, item_len);
				2625	args->name[item_len] = 0;
				2626
				2627	out:
				2628	btrfs_free_path(path);
				2629	return ret;
				2630	}
				2631
				2632	static noinline int btrfs_ioctl_ino_lookup(struct file *file,
				2633	void __user *argp)
				2634	{
				2635	struct btrfs_ioctl_ino_lookup_args *args;
				2636	struct inode *inode;
				2637	int ret = 0;
				2638
				2639	args = memdup_user(argp, sizeof(*args));
				2640	if (IS_ERR(args))
				2641	return PTR_ERR(args);
				2642
				2643	inode = file_inode(file);
				2644
				2645	/*
				2646	* Unprivileged query to obtain the containing subvolume root id. The
				2647	* path is reset so it's consistent with btrfs_search_path_in_tree.
				2648	*/
				2649	if (args->treeid == 0)
				2650	args->treeid = BTRFS_I(inode)->root->root_key.objectid;
				2651
				2652	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
				2653	args->name[0] = 0;
				2654	goto out;
				2655	}
				2656
				2657	if (!capable(CAP_SYS_ADMIN)) {
				2658	ret = -EPERM;
				2659	goto out;
				2660	}
				2661
				2662	ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
				2663	args->treeid, args->objectid,
				2664	args->name);
				2665
				2666	out:
				2667	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
				2668	ret = -EFAULT;
				2669
				2670	kfree(args);
				2671	return ret;
				2672	}
				2673
				2674	/*
				2675	* Version of ino_lookup ioctl (unprivileged)
				2676	*
				2677	* The main differences from ino_lookup ioctl are:
				2678	*
				2679	* 1. Read + Exec permission will be checked using inode_permission() during
				2680	* path construction. -EACCES will be returned in case of failure.
				2681	* 2. Path construction will be stopped at the inode number which corresponds
				2682	* to the fd with which this ioctl is called. If constructed path does not
				2683	* exist under fd's inode, -EACCES will be returned.
				2684	* 3. The name of bottom subvolume is also searched and filled.
				2685	*/
				2686	static int btrfs_ioctl_ino_lookup_user(struct file file, void __user argp)
				2687	{
				2688	struct btrfs_ioctl_ino_lookup_user_args *args;
				2689	struct inode *inode;
				2690	int ret;
				2691
				2692	args = memdup_user(argp, sizeof(*args));
				2693	if (IS_ERR(args))
				2694	return PTR_ERR(args);
				2695
				2696	inode = file_inode(file);
				2697
				2698	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
				2699	BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
				2700	/*
				2701	* The subvolume does not exist under fd with which this is
				2702	* called
				2703	*/
				2704	kfree(args);
				2705	return -EACCES;
				2706	}
				2707
				2708	ret = btrfs_search_path_in_tree_user(inode, args);
				2709
				2710	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
				2711	ret = -EFAULT;
				2712
				2713	kfree(args);
				2714	return ret;
				2715	}
				2716
				2717	/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
				2718	static int btrfs_ioctl_get_subvol_info(struct file file, void __user argp)
				2719	{
				2720	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
				2721	struct btrfs_fs_info *fs_info;
				2722	struct btrfs_root *root;
				2723	struct btrfs_path *path;
				2724	struct btrfs_key key;
				2725	struct btrfs_root_item *root_item;
				2726	struct btrfs_root_ref *rref;
				2727	struct extent_buffer *leaf;
				2728	unsigned long item_off;
				2729	unsigned long item_len;
				2730	struct inode *inode;
				2731	int slot;
				2732	int ret = 0;
				2733
				2734	path = btrfs_alloc_path();
				2735	if (!path)
				2736	return -ENOMEM;
				2737
				2738	subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
				2739	if (!subvol_info) {
				2740	btrfs_free_path(path);
				2741	return -ENOMEM;
				2742	}
				2743
				2744	inode = file_inode(file);
				2745	fs_info = BTRFS_I(inode)->root->fs_info;
				2746
				2747	/* Get root_item of inode's subvolume */
				2748	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
				2749	key.type = BTRFS_ROOT_ITEM_KEY;
				2750	key.offset = (u64)-1;
				2751	root = btrfs_read_fs_root_no_name(fs_info, &key);
				2752	if (IS_ERR(root)) {
				2753	ret = PTR_ERR(root);
				2754	goto out;
				2755	}
				2756	root_item = &root->root_item;
				2757
				2758	subvol_info->treeid = key.objectid;
				2759
				2760	subvol_info->generation = btrfs_root_generation(root_item);
				2761	subvol_info->flags = btrfs_root_flags(root_item);
				2762
				2763	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
				2764	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
				2765	BTRFS_UUID_SIZE);
				2766	memcpy(subvol_info->received_uuid, root_item->received_uuid,
				2767	BTRFS_UUID_SIZE);
				2768
				2769	subvol_info->ctransid = btrfs_root_ctransid(root_item);
				2770	subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime);
				2771	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime);
				2772
				2773	subvol_info->otransid = btrfs_root_otransid(root_item);
				2774	subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime);
				2775	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime);
				2776
				2777	subvol_info->stransid = btrfs_root_stransid(root_item);
				2778	subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime);
				2779	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime);
				2780
				2781	subvol_info->rtransid = btrfs_root_rtransid(root_item);
				2782	subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime);
				2783	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime);
				2784
				2785	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
				2786	/* Search root tree for ROOT_BACKREF of this subvolume */
				2787	root = fs_info->tree_root;
				2788
				2789	key.type = BTRFS_ROOT_BACKREF_KEY;
				2790	key.offset = 0;
				2791	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2792	if (ret < 0) {
				2793	goto out;
				2794	} else if (path->slots[0] >=
				2795	btrfs_header_nritems(path->nodes[0])) {
				2796	ret = btrfs_next_leaf(root, path);
				2797	if (ret < 0) {
				2798	goto out;
				2799	} else if (ret > 0) {
				2800	ret = -EUCLEAN;
				2801	goto out;
				2802	}
				2803	}
				2804
				2805	leaf = path->nodes[0];
				2806	slot = path->slots[0];
				2807	btrfs_item_key_to_cpu(leaf, &key, slot);
				2808	if (key.objectid == subvol_info->treeid &&
				2809	key.type == BTRFS_ROOT_BACKREF_KEY) {
				2810	subvol_info->parent_id = key.offset;
				2811
				2812	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
				2813	subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref);
				2814
				2815	item_off = btrfs_item_ptr_offset(leaf, slot)
				2816	+ sizeof(struct btrfs_root_ref);
				2817	item_len = btrfs_item_size_nr(leaf, slot)
				2818	- sizeof(struct btrfs_root_ref);
				2819	read_extent_buffer(leaf, subvol_info->name,
				2820	item_off, item_len);
				2821	} else {
				2822	ret = -ENOENT;
				2823	goto out;
				2824	}
				2825	}
				2826
				2827	if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
				2828	ret = -EFAULT;
				2829
				2830	out:
				2831	btrfs_free_path(path);
				2832	kzfree(subvol_info);
				2833	return ret;
				2834	}
				2835
				2836	/*
				2837	* Return ROOT_REF information of the subvolume containing this inode
				2838	* except the subvolume name.
				2839	*/
				2840	static int btrfs_ioctl_get_subvol_rootref(struct file file, void __user argp)
				2841	{
				2842	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
				2843	struct btrfs_root_ref *rref;
				2844	struct btrfs_root *root;
				2845	struct btrfs_path *path;
				2846	struct btrfs_key key;
				2847	struct extent_buffer *leaf;
				2848	struct inode *inode;
				2849	u64 objectid;
				2850	int slot;
				2851	int ret;
				2852	u8 found;
				2853
				2854	path = btrfs_alloc_path();
				2855	if (!path)
				2856	return -ENOMEM;
				2857
				2858	rootrefs = memdup_user(argp, sizeof(*rootrefs));
				2859	if (IS_ERR(rootrefs)) {
				2860	btrfs_free_path(path);
				2861	return PTR_ERR(rootrefs);
				2862	}
				2863
				2864	inode = file_inode(file);
				2865	root = BTRFS_I(inode)->root->fs_info->tree_root;
				2866	objectid = BTRFS_I(inode)->root->root_key.objectid;
				2867
				2868	key.objectid = objectid;
				2869	key.type = BTRFS_ROOT_REF_KEY;
				2870	key.offset = rootrefs->min_treeid;
				2871	found = 0;
				2872
				2873	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				2874	if (ret < 0) {
				2875	goto out;
				2876	} else if (path->slots[0] >=
				2877	btrfs_header_nritems(path->nodes[0])) {
				2878	ret = btrfs_next_leaf(root, path);
				2879	if (ret < 0) {
				2880	goto out;
				2881	} else if (ret > 0) {
				2882	ret = -EUCLEAN;
				2883	goto out;
				2884	}
				2885	}
				2886	while (1) {
				2887	leaf = path->nodes[0];
				2888	slot = path->slots[0];
				2889
				2890	btrfs_item_key_to_cpu(leaf, &key, slot);
				2891	if (key.objectid != objectid \|\| key.type != BTRFS_ROOT_REF_KEY) {
				2892	ret = 0;
				2893	goto out;
				2894	}
				2895
				2896	if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
				2897	ret = -EOVERFLOW;
				2898	goto out;
				2899	}
				2900
				2901	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
				2902	rootrefs->rootref[found].treeid = key.offset;
				2903	rootrefs->rootref[found].dirid =
				2904	btrfs_root_ref_dirid(leaf, rref);
				2905	found++;
				2906
				2907	ret = btrfs_next_item(root, path);
				2908	if (ret < 0) {
				2909	goto out;
				2910	} else if (ret > 0) {
				2911	ret = -EUCLEAN;
				2912	goto out;
				2913	}
				2914	}
				2915
				2916	out:
				2917	if (!ret \|\| ret == -EOVERFLOW) {
				2918	rootrefs->num_items = found;
				2919	/* update min_treeid for next search */
				2920	if (found)
				2921	rootrefs->min_treeid =
				2922	rootrefs->rootref[found - 1].treeid + 1;
				2923	if (copy_to_user(argp, rootrefs, sizeof(*rootrefs)))
				2924	ret = -EFAULT;
				2925	}
				2926
				2927	kfree(rootrefs);
				2928	btrfs_free_path(path);
				2929
				2930	return ret;
				2931	}
				2932
				2933	static noinline int btrfs_ioctl_snap_destroy(struct file *file,
				2934	void __user *arg)
				2935	{
				2936	struct dentry *parent = file->f_path.dentry;
				2937	struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb);
				2938	struct dentry *dentry;
				2939	struct inode *dir = d_inode(parent);
				2940	struct inode *inode;
				2941	struct btrfs_root *root = BTRFS_I(dir)->root;
				2942	struct btrfs_root *dest = NULL;
				2943	struct btrfs_ioctl_vol_args *vol_args;
				2944	int namelen;
				2945	int err = 0;
				2946
				2947	if (!S_ISDIR(dir->i_mode))
				2948	return -ENOTDIR;
				2949
				2950	vol_args = memdup_user(arg, sizeof(*vol_args));
				2951	if (IS_ERR(vol_args))
				2952	return PTR_ERR(vol_args);
				2953
				2954	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
				2955	namelen = strlen(vol_args->name);
				2956	if (strchr(vol_args->name, '/') \|\|
				2957	strncmp(vol_args->name, "..", namelen) == 0) {
				2958	err = -EINVAL;
				2959	goto out;
				2960	}
				2961
				2962	err = mnt_want_write_file(file);
				2963	if (err)
				2964	goto out;
				2965
				2966
				2967	err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
				2968	if (err == -EINTR)
				2969	goto out_drop_write;
				2970	dentry = lookup_one_len(vol_args->name, parent, namelen);
				2971	if (IS_ERR(dentry)) {
				2972	err = PTR_ERR(dentry);
				2973	goto out_unlock_dir;
				2974	}
				2975
				2976	if (d_really_is_negative(dentry)) {
				2977	err = -ENOENT;
				2978	goto out_dput;
				2979	}
				2980
				2981	inode = d_inode(dentry);
				2982	dest = BTRFS_I(inode)->root;
				2983	if (!capable(CAP_SYS_ADMIN)) {
				2984	/*
				2985	* Regular user. Only allow this with a special mount
				2986	* option, when the user has write+exec access to the
				2987	* subvol root, and when rmdir(2) would have been
				2988	* allowed.
				2989	*
				2990	* Note that this is _not_ check that the subvol is
				2991	* empty or doesn't contain data that we wouldn't
				2992	* otherwise be able to delete.
				2993	*
				2994	* Users who want to delete empty subvols should try
				2995	* rmdir(2).
				2996	*/
				2997	err = -EPERM;
				2998	if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED))
				2999	goto out_dput;
				3000
				3001	/*
				3002	* Do not allow deletion if the parent dir is the same
				3003	* as the dir to be deleted. That means the ioctl
				3004	* must be called on the dentry referencing the root
				3005	* of the subvol, not a random directory contained
				3006	* within it.
				3007	*/
				3008	err = -EINVAL;
				3009	if (root == dest)
				3010	goto out_dput;
				3011
				3012	err = inode_permission(inode, MAY_WRITE \| MAY_EXEC);
				3013	if (err)
				3014	goto out_dput;
				3015	}
				3016
				3017	/* check if subvolume may be deleted by a user */
				3018	err = btrfs_may_delete(dir, dentry, 1);
				3019	if (err)
				3020	goto out_dput;
				3021
				3022	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
				3023	err = -EINVAL;
				3024	goto out_dput;
				3025	}
				3026
				3027	inode_lock(inode);
				3028	err = btrfs_delete_subvolume(dir, dentry);
				3029	inode_unlock(inode);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3030	if (!err) {
				3031	fsnotify_rmdir(dir, dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3032	d_delete(dentry);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3033	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3034
				3035	out_dput:
				3036	dput(dentry);
				3037	out_unlock_dir:
				3038	inode_unlock(dir);
				3039	out_drop_write:
				3040	mnt_drop_write_file(file);
				3041	out:
				3042	kfree(vol_args);
				3043	return err;
				3044	}
				3045
				3046	static int btrfs_ioctl_defrag(struct file file, void __user argp)
				3047	{
				3048	struct inode *inode = file_inode(file);
				3049	struct btrfs_root *root = BTRFS_I(inode)->root;
				3050	struct btrfs_ioctl_defrag_range_args *range;
				3051	int ret;
				3052
				3053	ret = mnt_want_write_file(file);
				3054	if (ret)
				3055	return ret;
				3056
				3057	if (btrfs_root_readonly(root)) {
				3058	ret = -EROFS;
				3059	goto out;
				3060	}
				3061
				3062	switch (inode->i_mode & S_IFMT) {
				3063	case S_IFDIR:
				3064	if (!capable(CAP_SYS_ADMIN)) {
				3065	ret = -EPERM;
				3066	goto out;
				3067	}
				3068	ret = btrfs_defrag_root(root);
				3069	break;
				3070	case S_IFREG:
				3071	/*
				3072	* Note that this does not check the file descriptor for write
				3073	* access. This prevents defragmenting executables that are
				3074	* running and allows defrag on files open in read-only mode.
				3075	*/
				3076	if (!capable(CAP_SYS_ADMIN) &&
				3077	inode_permission(inode, MAY_WRITE)) {
				3078	ret = -EPERM;
				3079	goto out;
				3080	}
				3081
				3082	range = kzalloc(sizeof(*range), GFP_KERNEL);
				3083	if (!range) {
				3084	ret = -ENOMEM;
				3085	goto out;
				3086	}
				3087
				3088	if (argp) {
				3089	if (copy_from_user(range, argp,
				3090	sizeof(*range))) {
				3091	ret = -EFAULT;
				3092	kfree(range);
				3093	goto out;
				3094	}
				3095	/* compression requires us to start the IO */
				3096	if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
				3097	range->flags \|= BTRFS_DEFRAG_RANGE_START_IO;
				3098	range->extent_thresh = (u32)-1;
				3099	}
				3100	} else {
				3101	/* the rest are all set to zero by kzalloc */
				3102	range->len = (u64)-1;
				3103	}
				3104	ret = btrfs_defrag_file(file_inode(file), file,
				3105	range, BTRFS_OLDEST_GENERATION, 0);
				3106	if (ret > 0)
				3107	ret = 0;
				3108	kfree(range);
				3109	break;
				3110	default:
				3111	ret = -EINVAL;
				3112	}
				3113	out:
				3114	mnt_drop_write_file(file);
				3115	return ret;
				3116	}
				3117
				3118	static long btrfs_ioctl_add_dev(struct btrfs_fs_info fs_info, void __user arg)
				3119	{
				3120	struct btrfs_ioctl_vol_args *vol_args;
				3121	int ret;
				3122
				3123	if (!capable(CAP_SYS_ADMIN))
				3124	return -EPERM;
				3125
				3126	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
				3127	return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				3128
				3129	vol_args = memdup_user(arg, sizeof(*vol_args));
				3130	if (IS_ERR(vol_args)) {
				3131	ret = PTR_ERR(vol_args);
				3132	goto out;
				3133	}
				3134
				3135	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
				3136	ret = btrfs_init_new_device(fs_info, vol_args->name);
				3137
				3138	if (!ret)
				3139	btrfs_info(fs_info, "disk added %s", vol_args->name);
				3140
				3141	kfree(vol_args);
				3142	out:
				3143	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				3144	return ret;
				3145	}
				3146
				3147	static long btrfs_ioctl_rm_dev_v2(struct file file, void __user arg)
				3148	{
				3149	struct inode *inode = file_inode(file);
				3150	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				3151	struct btrfs_ioctl_vol_args_v2 *vol_args;
				3152	int ret;
				3153
				3154	if (!capable(CAP_SYS_ADMIN))
				3155	return -EPERM;
				3156
				3157	ret = mnt_want_write_file(file);
				3158	if (ret)
				3159	return ret;
				3160
				3161	vol_args = memdup_user(arg, sizeof(*vol_args));
				3162	if (IS_ERR(vol_args)) {
				3163	ret = PTR_ERR(vol_args);
				3164	goto err_drop;
				3165	}
				3166
				3167	/* Check for compatibility reject unknown flags */
				3168	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
				3169	ret = -EOPNOTSUPP;
				3170	goto out;
				3171	}
				3172
				3173	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
				3174	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				3175	goto out;
				3176	}
				3177
				3178	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
				3179	ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
				3180	} else {
				3181	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
				3182	ret = btrfs_rm_device(fs_info, vol_args->name, 0);
				3183	}
				3184	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				3185
				3186	if (!ret) {
				3187	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
				3188	btrfs_info(fs_info, "device deleted: id %llu",
				3189	vol_args->devid);
				3190	else
				3191	btrfs_info(fs_info, "device deleted: %s",
				3192	vol_args->name);
				3193	}
				3194	out:
				3195	kfree(vol_args);
				3196	err_drop:
				3197	mnt_drop_write_file(file);
				3198	return ret;
				3199	}
				3200
				3201	static long btrfs_ioctl_rm_dev(struct file file, void __user arg)
				3202	{
				3203	struct inode *inode = file_inode(file);
				3204	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				3205	struct btrfs_ioctl_vol_args *vol_args;
				3206	int ret;
				3207
				3208	if (!capable(CAP_SYS_ADMIN))
				3209	return -EPERM;
				3210
				3211	ret = mnt_want_write_file(file);
				3212	if (ret)
				3213	return ret;
				3214
				3215	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
				3216	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				3217	goto out_drop_write;
				3218	}
				3219
				3220	vol_args = memdup_user(arg, sizeof(*vol_args));
				3221	if (IS_ERR(vol_args)) {
				3222	ret = PTR_ERR(vol_args);
				3223	goto out;
				3224	}
				3225
				3226	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
				3227	ret = btrfs_rm_device(fs_info, vol_args->name, 0);
				3228
				3229	if (!ret)
				3230	btrfs_info(fs_info, "disk deleted %s", vol_args->name);
				3231	kfree(vol_args);
				3232	out:
				3233	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				3234	out_drop_write:
				3235	mnt_drop_write_file(file);
				3236
				3237	return ret;
				3238	}
				3239
				3240	static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
				3241	void __user *arg)
				3242	{
				3243	struct btrfs_ioctl_fs_info_args *fi_args;
				3244	struct btrfs_device *device;
				3245	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
				3246	int ret = 0;
				3247
				3248	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
				3249	if (!fi_args)
				3250	return -ENOMEM;
				3251
				3252	rcu_read_lock();
				3253	fi_args->num_devices = fs_devices->num_devices;
				3254
				3255	list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
				3256	if (device->devid > fi_args->max_id)
				3257	fi_args->max_id = device->devid;
				3258	}
				3259	rcu_read_unlock();
				3260
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3261	memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3262	fi_args->nodesize = fs_info->nodesize;
				3263	fi_args->sectorsize = fs_info->sectorsize;
				3264	fi_args->clone_alignment = fs_info->sectorsize;
				3265
				3266	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
				3267	ret = -EFAULT;
				3268
				3269	kfree(fi_args);
				3270	return ret;
				3271	}
				3272
				3273	static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
				3274	void __user *arg)
				3275	{
				3276	struct btrfs_ioctl_dev_info_args *di_args;
				3277	struct btrfs_device *dev;
				3278	int ret = 0;
				3279	char *s_uuid = NULL;
				3280
				3281	di_args = memdup_user(arg, sizeof(*di_args));
				3282	if (IS_ERR(di_args))
				3283	return PTR_ERR(di_args);
				3284
				3285	if (!btrfs_is_empty_uuid(di_args->uuid))
				3286	s_uuid = di_args->uuid;
				3287
				3288	rcu_read_lock();
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3289	dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
				3290	NULL, true);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3291
				3292	if (!dev) {
				3293	ret = -ENODEV;
				3294	goto out;
				3295	}
				3296
				3297	di_args->devid = dev->devid;
				3298	di_args->bytes_used = btrfs_device_get_bytes_used(dev);
				3299	di_args->total_bytes = btrfs_device_get_total_bytes(dev);
				3300	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
				3301	if (dev->name) {
				3302	strncpy(di_args->path, rcu_str_deref(dev->name),
				3303	sizeof(di_args->path) - 1);
				3304	di_args->path[sizeof(di_args->path) - 1] = 0;
				3305	} else {
				3306	di_args->path[0] = '\0';
				3307	}
				3308
				3309	out:
				3310	rcu_read_unlock();
				3311	if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
				3312	ret = -EFAULT;
				3313
				3314	kfree(di_args);
				3315	return ret;
				3316	}
				3317
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3318	static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3319	struct inode *inode2, u64 loff2, u64 len)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3320	{
				3321	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
				3322	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
				3323	}
				3324
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3325	static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
				3326	struct inode *inode2, u64 loff2, u64 len)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3327	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3328	if (inode1 < inode2) {
				3329	swap(inode1, inode2);
				3330	swap(loff1, loff2);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3331	} else if (inode1 == inode2 && loff2 < loff1) {
				3332	swap(loff1, loff2);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3333	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3334	lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
				3335	lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3336	}
				3337
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3338	static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
				3339	struct inode *dst, u64 dst_loff)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3340	{
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	3341	const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3342	int ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3343
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3344	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3345	* Lock destination range to serialize with concurrent readpages() and
				3346	* source range to serialize with relocation.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3347	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3348	btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	3349	ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3350	btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3351
				3352	return ret;
				3353	}
				3354
				3355	#define BTRFS_MAX_DEDUPE_LEN SZ_16M
				3356
				3357	static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
				3358	struct inode *dst, u64 dst_loff)
				3359	{
				3360	int ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3361	u64 i, tail_len, chunk_count;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3362	struct btrfs_root *root_dst = BTRFS_I(dst)->root;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3363
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3364	spin_lock(&root_dst->root_item_lock);
				3365	if (root_dst->send_in_progress) {
				3366	btrfs_warn_rl(root_dst->fs_info,
				3367	"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
				3368	root_dst->root_key.objectid,
				3369	root_dst->send_in_progress);
				3370	spin_unlock(&root_dst->root_item_lock);
				3371	return -EAGAIN;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3372	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3373	root_dst->dedupe_in_progress++;
				3374	spin_unlock(&root_dst->root_item_lock);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3375
				3376	tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
				3377	chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3378
				3379	for (i = 0; i < chunk_count; i++) {
				3380	ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3381	dst, dst_loff);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3382	if (ret)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3383	goto out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3384
				3385	loff += BTRFS_MAX_DEDUPE_LEN;
				3386	dst_loff += BTRFS_MAX_DEDUPE_LEN;
				3387	}
				3388
				3389	if (tail_len > 0)
				3390	ret = btrfs_extent_same_range(src, loff, tail_len, dst,
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3391	dst_loff);
				3392	out:
				3393	spin_lock(&root_dst->root_item_lock);
				3394	root_dst->dedupe_in_progress--;
				3395	spin_unlock(&root_dst->root_item_lock);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3396
				3397	return ret;
				3398	}
				3399
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3400	static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
				3401	struct inode *inode,
				3402	u64 endoff,
				3403	const u64 destoff,
				3404	const u64 olen,
				3405	int no_time_update)
				3406	{
				3407	struct btrfs_root *root = BTRFS_I(inode)->root;
				3408	int ret;
				3409
				3410	inode_inc_iversion(inode);
				3411	if (!no_time_update)
				3412	inode->i_mtime = inode->i_ctime = current_time(inode);
				3413	/*
				3414	* We round up to the block size at eof when determining which
				3415	* extents to clone above, but shouldn't round up the file size.
				3416	*/
				3417	if (endoff > destoff + olen)
				3418	endoff = destoff + olen;
				3419	if (endoff > inode->i_size)
				3420	btrfs_i_size_write(BTRFS_I(inode), endoff);
				3421
				3422	ret = btrfs_update_inode(trans, root, inode);
				3423	if (ret) {
				3424	btrfs_abort_transaction(trans, ret);
				3425	btrfs_end_transaction(trans);
				3426	goto out;
				3427	}
				3428	ret = btrfs_end_transaction(trans);
				3429	out:
				3430	return ret;
				3431	}
				3432
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3433	/*
				3434	* Make sure we do not end up inserting an inline extent into a file that has
				3435	* already other (non-inline) extents. If a file has an inline extent it can
				3436	* not have any other extents and the (single) inline extent must start at the
				3437	* file offset 0. Failing to respect these rules will lead to file corruption,
				3438	* resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
				3439	*
				3440	* We can have extents that have been already written to disk or we can have
				3441	* dirty ranges still in delalloc, in which case the extent maps and items are
				3442	* created only when we run delalloc, and the delalloc ranges might fall outside
				3443	* the range we are currently locking in the inode's io tree. So we check the
				3444	* inode's i_size because of that (i_size updates are done while holding the
				3445	* i_mutex, which we are holding here).
				3446	* We also check to see if the inode has a size not greater than "datal" but has
				3447	* extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
				3448	* protected against such concurrent fallocate calls by the i_mutex).
				3449	*
				3450	* If the file has no extents but a size greater than datal, do not allow the
				3451	* copy because we would need turn the inline extent into a non-inline one (even
				3452	* with NO_HOLES enabled). If we find our destination inode only has one inline
				3453	* extent, just overwrite it with the source inline extent if its size is less
				3454	* than the source extent's size, or we could copy the source inline extent's
				3455	* data into the destination inode's inline extent if the later is greater then
				3456	* the former.
				3457	*/
				3458	static int clone_copy_inline_extent(struct inode *dst,
				3459	struct btrfs_trans_handle *trans,
				3460	struct btrfs_path *path,
				3461	struct btrfs_key *new_key,
				3462	const u64 drop_start,
				3463	const u64 datal,
				3464	const u64 skip,
				3465	const u64 size,
				3466	char *inline_data)
				3467	{
				3468	struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
				3469	struct btrfs_root *root = BTRFS_I(dst)->root;
				3470	const u64 aligned_end = ALIGN(new_key->offset + datal,
				3471	fs_info->sectorsize);
				3472	int ret;
				3473	struct btrfs_key key;
				3474
				3475	if (new_key->offset > 0)
				3476	return -EOPNOTSUPP;
				3477
				3478	key.objectid = btrfs_ino(BTRFS_I(dst));
				3479	key.type = BTRFS_EXTENT_DATA_KEY;
				3480	key.offset = 0;
				3481	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
				3482	if (ret < 0) {
				3483	return ret;
				3484	} else if (ret > 0) {
				3485	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
				3486	ret = btrfs_next_leaf(root, path);
				3487	if (ret < 0)
				3488	return ret;
				3489	else if (ret > 0)
				3490	goto copy_inline_extent;
				3491	}
				3492	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
				3493	if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
				3494	key.type == BTRFS_EXTENT_DATA_KEY) {
				3495	ASSERT(key.offset > 0);
				3496	return -EOPNOTSUPP;
				3497	}
				3498	} else if (i_size_read(dst) <= datal) {
				3499	struct btrfs_file_extent_item *ei;
				3500	u64 ext_len;
				3501
				3502	/*
				3503	* If the file size is <= datal, make sure there are no other
				3504	* extents following (can happen do to an fallocate call with
				3505	* the flag FALLOC_FL_KEEP_SIZE).
				3506	*/
				3507	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
				3508	struct btrfs_file_extent_item);
				3509	/*
				3510	* If it's an inline extent, it can not have other extents
				3511	* following it.
				3512	*/
				3513	if (btrfs_file_extent_type(path->nodes[0], ei) ==
				3514	BTRFS_FILE_EXTENT_INLINE)
				3515	goto copy_inline_extent;
				3516
				3517	ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
				3518	if (ext_len > aligned_end)
				3519	return -EOPNOTSUPP;
				3520
				3521	ret = btrfs_next_item(root, path);
				3522	if (ret < 0) {
				3523	return ret;
				3524	} else if (ret == 0) {
				3525	btrfs_item_key_to_cpu(path->nodes[0], &key,
				3526	path->slots[0]);
				3527	if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
				3528	key.type == BTRFS_EXTENT_DATA_KEY)
				3529	return -EOPNOTSUPP;
				3530	}
				3531	}
				3532
				3533	copy_inline_extent:
				3534	/*
				3535	* We have no extent items, or we have an extent at offset 0 which may
				3536	* or may not be inlined. All these cases are dealt the same way.
				3537	*/
				3538	if (i_size_read(dst) > datal) {
				3539	/*
				3540	* If the destination inode has an inline extent...
				3541	* This would require copying the data from the source inline
				3542	* extent into the beginning of the destination's inline extent.
				3543	* But this is really complex, both extents can be compressed
				3544	* or just one of them, which would require decompressing and
				3545	* re-compressing data (which could increase the new compressed
				3546	* size, not allowing the compressed data to fit anymore in an
				3547	* inline extent).
				3548	* So just don't support this case for now (it should be rare,
				3549	* we are not really saving space when cloning inline extents).
				3550	*/
				3551	return -EOPNOTSUPP;
				3552	}
				3553
				3554	btrfs_release_path(path);
				3555	ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
				3556	if (ret)
				3557	return ret;
				3558	ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
				3559	if (ret)
				3560	return ret;
				3561
				3562	if (skip) {
				3563	const u32 start = btrfs_file_extent_calc_inline_size(0);
				3564
				3565	memmove(inline_data + start, inline_data + start + skip, datal);
				3566	}
				3567
				3568	write_extent_buffer(path->nodes[0], inline_data,
				3569	btrfs_item_ptr_offset(path->nodes[0],
				3570	path->slots[0]),
				3571	size);
				3572	inode_add_bytes(dst, datal);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3573	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3574
				3575	return 0;
				3576	}
				3577
				3578	/**
				3579	* btrfs_clone() - clone a range from inode file to another
				3580	*
				3581	* @src: Inode to clone from
				3582	* @inode: Inode to clone to
				3583	* @off: Offset within source to start clone from
				3584	* @olen: Original length, passed by user, of range to clone
				3585	* @olen_aligned: Block-aligned value of olen
				3586	* @destoff: Offset within @inode to start clone
				3587	* @no_time_update: Whether to update mtime/ctime on the target inode
				3588	*/
				3589	static int btrfs_clone(struct inode src, struct inode inode,
				3590	const u64 off, const u64 olen, const u64 olen_aligned,
				3591	const u64 destoff, int no_time_update)
				3592	{
				3593	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				3594	struct btrfs_root *root = BTRFS_I(inode)->root;
				3595	struct btrfs_path *path = NULL;
				3596	struct extent_buffer *leaf;
				3597	struct btrfs_trans_handle *trans;
				3598	char *buf = NULL;
				3599	struct btrfs_key key;
				3600	u32 nritems;
				3601	int slot;
				3602	int ret;
				3603	const u64 len = olen_aligned;
				3604	u64 last_dest_end = destoff;
				3605
				3606	ret = -ENOMEM;
				3607	buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
				3608	if (!buf)
				3609	return ret;
				3610
				3611	path = btrfs_alloc_path();
				3612	if (!path) {
				3613	kvfree(buf);
				3614	return ret;
				3615	}
				3616
				3617	path->reada = READA_FORWARD;
				3618	/* clone data */
				3619	key.objectid = btrfs_ino(BTRFS_I(src));
				3620	key.type = BTRFS_EXTENT_DATA_KEY;
				3621	key.offset = off;
				3622
				3623	while (1) {
				3624	u64 next_key_min_offset = key.offset + 1;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3625	struct btrfs_file_extent_item *extent;
				3626	int type;
				3627	u32 size;
				3628	struct btrfs_key new_key;
				3629	u64 disko = 0, diskl = 0;
				3630	u64 datao = 0, datal = 0;
				3631	u8 comp;
				3632	u64 drop_start;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3633
				3634	/*
				3635	* note the key will change type as we walk through the
				3636	* tree.
				3637	*/
				3638	path->leave_spinning = 1;
				3639	ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
				3640	0, 0);
				3641	if (ret < 0)
				3642	goto out;
				3643	/*
				3644	* First search, if no extent item that starts at offset off was
				3645	* found but the previous item is an extent item, it's possible
				3646	* it might overlap our target range, therefore process it.
				3647	*/
				3648	if (key.offset == off && ret > 0 && path->slots[0] > 0) {
				3649	btrfs_item_key_to_cpu(path->nodes[0], &key,
				3650	path->slots[0] - 1);
				3651	if (key.type == BTRFS_EXTENT_DATA_KEY)
				3652	path->slots[0]--;
				3653	}
				3654
				3655	nritems = btrfs_header_nritems(path->nodes[0]);
				3656	process_slot:
				3657	if (path->slots[0] >= nritems) {
				3658	ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
				3659	if (ret < 0)
				3660	goto out;
				3661	if (ret > 0)
				3662	break;
				3663	nritems = btrfs_header_nritems(path->nodes[0]);
				3664	}
				3665	leaf = path->nodes[0];
				3666	slot = path->slots[0];
				3667
				3668	btrfs_item_key_to_cpu(leaf, &key, slot);
				3669	if (key.type > BTRFS_EXTENT_DATA_KEY \|\|
				3670	key.objectid != btrfs_ino(BTRFS_I(src)))
				3671	break;
				3672
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3673	ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3674
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3675	extent = btrfs_item_ptr(leaf, slot,
				3676	struct btrfs_file_extent_item);
				3677	comp = btrfs_file_extent_compression(leaf, extent);
				3678	type = btrfs_file_extent_type(leaf, extent);
				3679	if (type == BTRFS_FILE_EXTENT_REG \|\|
				3680	type == BTRFS_FILE_EXTENT_PREALLOC) {
				3681	disko = btrfs_file_extent_disk_bytenr(leaf, extent);
				3682	diskl = btrfs_file_extent_disk_num_bytes(leaf, extent);
				3683	datao = btrfs_file_extent_offset(leaf, extent);
				3684	datal = btrfs_file_extent_num_bytes(leaf, extent);
				3685	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
				3686	/* Take upper bound, may be compressed */
				3687	datal = btrfs_file_extent_ram_bytes(leaf, extent);
				3688	}
				3689
				3690	/*
				3691	* The first search might have left us at an extent item that
				3692	* ends before our target range's start, can happen if we have
				3693	* holes and NO_HOLES feature enabled.
				3694	*/
				3695	if (key.offset + datal <= off) {
				3696	path->slots[0]++;
				3697	goto process_slot;
				3698	} else if (key.offset >= off + len) {
				3699	break;
				3700	}
				3701	next_key_min_offset = key.offset + datal;
				3702	size = btrfs_item_size_nr(leaf, slot);
				3703	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot),
				3704	size);
				3705
				3706	btrfs_release_path(path);
				3707	path->leave_spinning = 0;
				3708
				3709	memcpy(&new_key, &key, sizeof(new_key));
				3710	new_key.objectid = btrfs_ino(BTRFS_I(inode));
				3711	if (off <= key.offset)
				3712	new_key.offset = key.offset + destoff - off;
				3713	else
				3714	new_key.offset = destoff;
				3715
				3716	/*
				3717	* Deal with a hole that doesn't have an extent item that
				3718	* represents it (NO_HOLES feature enabled).
				3719	* This hole is either in the middle of the cloning range or at
				3720	* the beginning (fully overlaps it or partially overlaps it).
				3721	*/
				3722	if (new_key.offset != last_dest_end)
				3723	drop_start = last_dest_end;
				3724	else
				3725	drop_start = new_key.offset;
				3726
				3727	if (type == BTRFS_FILE_EXTENT_REG \|\|
				3728	type == BTRFS_FILE_EXTENT_PREALLOC) {
				3729	struct btrfs_clone_extent_info clone_info;
				3730
				3731	/*
				3732	* a \| --- range to clone ---\| b
				3733	* \| ------------- extent ------------- \|
				3734	*/
				3735
				3736	/* Subtract range b */
				3737	if (key.offset + datal > off + len)
				3738	datal = off + len - key.offset;
				3739
				3740	/* Subtract range a */
				3741	if (off > key.offset) {
				3742	datao += off - key.offset;
				3743	datal -= off - key.offset;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3744	}
				3745
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3746	clone_info.disk_offset = disko;
				3747	clone_info.disk_len = diskl;
				3748	clone_info.data_offset = datao;
				3749	clone_info.data_len = datal;
				3750	clone_info.file_offset = new_key.offset;
				3751	clone_info.extent_buf = buf;
				3752	clone_info.item_size = size;
				3753	ret = btrfs_punch_hole_range(inode, path,
				3754	drop_start,
				3755	new_key.offset + datal - 1,
				3756	&clone_info, &trans);
				3757	if (ret)
				3758	goto out;
				3759	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
				3760	u64 skip = 0;
				3761	u64 trim = 0;
				3762
				3763	if (off > key.offset) {
				3764	skip = off - key.offset;
				3765	new_key.offset += skip;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3766	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3767
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3768	if (key.offset + datal > off + len)
				3769	trim = key.offset + datal - (off + len);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3770
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3771	if (comp && (skip \|\| trim)) {
				3772	ret = -EINVAL;
				3773	goto out;
				3774	}
				3775	size -= skip + trim;
				3776	datal -= skip + trim;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3777
				3778	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3779	* If our extent is inline, we know we will drop or
				3780	* adjust at most 1 extent item in the destination root.
				3781	*
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3782	* 1 - adjusting old extent (we may have to split it)
				3783	* 1 - add new extent
				3784	* 1 - inode update
				3785	*/
				3786	trans = btrfs_start_transaction(root, 3);
				3787	if (IS_ERR(trans)) {
				3788	ret = PTR_ERR(trans);
				3789	goto out;
				3790	}
				3791
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3792	ret = clone_copy_inline_extent(inode, trans, path,
				3793	&new_key, drop_start,
				3794	datal, skip, size, buf);
				3795	if (ret) {
				3796	if (ret != -EOPNOTSUPP)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3797	btrfs_abort_transaction(trans, ret);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3798	btrfs_end_transaction(trans);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3799	goto out;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3800	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3801	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3802
				3803	btrfs_release_path(path);
				3804
				3805	last_dest_end = ALIGN(new_key.offset + datal,
				3806	fs_info->sectorsize);
				3807	ret = clone_finish_inode_update(trans, inode, last_dest_end,
				3808	destoff, olen, no_time_update);
				3809	if (ret)
				3810	goto out;
				3811	if (new_key.offset + datal >= destoff + len)
				3812	break;
				3813
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3814	btrfs_release_path(path);
				3815	key.offset = next_key_min_offset;
				3816
				3817	if (fatal_signal_pending(current)) {
				3818	ret = -EINTR;
				3819	goto out;
				3820	}
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	3821
				3822	cond_resched();
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3823	}
				3824	ret = 0;
				3825
				3826	if (last_dest_end < destoff + len) {
				3827	/*
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	3828	* We have an implicit hole that fully or partially overlaps our
				3829	* cloning range at its end. This means that we either have the
				3830	* NO_HOLES feature enabled or the implicit hole happened due to
				3831	* mixing buffered and direct IO writes against this file.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3832	*/
				3833	btrfs_release_path(path);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3834	path->leave_spinning = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3835
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3836	ret = btrfs_punch_hole_range(inode, path,
				3837	last_dest_end, destoff + len - 1,
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	3838	NULL, &trans);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3839	if (ret)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3840	goto out;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3841
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3842	ret = clone_finish_inode_update(trans, inode, destoff + len,
				3843	destoff, olen, no_time_update);
				3844	}
				3845
				3846	out:
				3847	btrfs_free_path(path);
				3848	kvfree(buf);
				3849	return ret;
				3850	}
				3851
				3852	static noinline int btrfs_clone_files(struct file file, struct file file_src,
				3853	u64 off, u64 olen, u64 destoff)
				3854	{
				3855	struct inode *inode = file_inode(file);
				3856	struct inode *src = file_inode(file_src);
				3857	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3858	int ret;
				3859	u64 len = olen;
				3860	u64 bs = fs_info->sb->s_blocksize;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3861
				3862	/*
				3863	* TODO:
				3864	* - split compressed inline extents. annoying: we need to
				3865	* decompress into destination's address_space (the file offset
				3866	* may change, so source mapping won't do), then recompress (or
				3867	* otherwise reinsert) a subrange.
				3868	*
				3869	* - split destination inode's inline extents. The inline extents can
				3870	* be either compressed or non-compressed.
				3871	*/
				3872
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3873	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3874	* VFS's generic_remap_file_range_prep() protects us from cloning the
				3875	* eof block into the middle of a file, which would result in corruption
				3876	* if the file size is not blocksize aligned. So we don't need to check
				3877	* for that case here.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3878	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3879	if (off + len == src->i_size)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3880	len = ALIGN(src->i_size, bs) - off;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3881
				3882	if (destoff > inode->i_size) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3883	const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
				3884
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3885	ret = btrfs_cont_expand(inode, inode->i_size, destoff);
				3886	if (ret)
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3887	return ret;
				3888	/*
				3889	* We may have truncated the last block if the inode's size is
				3890	* not sector size aligned, so we need to wait for writeback to
				3891	* complete before proceeding further, otherwise we can race
				3892	* with cloning and attempt to increment a reference to an
				3893	* extent that no longer exists (writeback completed right after
				3894	* we found the previous extent covering eof and before we
				3895	* attempted to increment its reference count).
				3896	*/
				3897	ret = btrfs_wait_ordered_range(inode, wb_start,
				3898	destoff - wb_start);
				3899	if (ret)
				3900	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3901	}
				3902
				3903	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3904	* Lock destination range to serialize with concurrent readpages() and
				3905	* source range to serialize with relocation.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3906	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3907	btrfs_double_extent_lock(src, off, inode, destoff, len);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3908	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3909	btrfs_double_extent_unlock(src, off, inode, destoff, len);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3910	/*
				3911	* Truncate page cache pages so that future reads will see the cloned
				3912	* data immediately and not the previous data.
				3913	*/
				3914	truncate_inode_pages_range(&inode->i_data,
				3915	round_down(destoff, PAGE_SIZE),
				3916	round_up(destoff + len, PAGE_SIZE) - 1);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3917
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3918	return ret;
				3919	}
				3920
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3921	static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
				3922	struct file *file_out, loff_t pos_out,
				3923	loff_t *len, unsigned int remap_flags)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3924	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3925	struct inode *inode_in = file_inode(file_in);
				3926	struct inode *inode_out = file_inode(file_out);
				3927	u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize;
				3928	bool same_inode = inode_out == inode_in;
				3929	u64 wb_len;
				3930	int ret;
				3931
				3932	if (!(remap_flags & REMAP_FILE_DEDUP)) {
				3933	struct btrfs_root *root_out = BTRFS_I(inode_out)->root;
				3934
				3935	if (btrfs_root_readonly(root_out))
				3936	return -EROFS;
				3937
				3938	if (file_in->f_path.mnt != file_out->f_path.mnt \|\|
				3939	inode_in->i_sb != inode_out->i_sb)
				3940	return -EXDEV;
				3941	}
				3942
				3943	/* don't make the dst file partly checksummed */
				3944	if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
				3945	(BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
				3946	return -EINVAL;
				3947	}
				3948
				3949	/*
				3950	* Now that the inodes are locked, we need to start writeback ourselves
				3951	* and can not rely on the writeback from the VFS's generic helper
				3952	* generic_remap_file_range_prep() because:
				3953	*
				3954	* 1) For compression we must call filemap_fdatawrite_range() range
				3955	* twice (btrfs_fdatawrite_range() does it for us), and the generic
				3956	* helper only calls it once;
				3957	*
				3958	* 2) filemap_fdatawrite_range(), called by the generic helper only
				3959	* waits for the writeback to complete, i.e. for IO to be done, and
				3960	* not for the ordered extents to complete. We need to wait for them
				3961	* to complete so that new file extent items are in the fs tree.
				3962	*/
				3963	if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP))
				3964	wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs);
				3965	else
				3966	wb_len = ALIGN(*len, bs);
				3967
				3968	/*
				3969	* Since we don't lock ranges, wait for ongoing lockless dio writes (as
				3970	* any in progress could create its ordered extents after we wait for
				3971	* existing ordered extents below).
				3972	*/
				3973	inode_dio_wait(inode_in);
				3974	if (!same_inode)
				3975	inode_dio_wait(inode_out);
				3976
				3977	/*
				3978	* Workaround to make sure NOCOW buffered write reach disk as NOCOW.
				3979	*
				3980	* Btrfs' back references do not have a block level granularity, they
				3981	* work at the whole extent level.
				3982	* NOCOW buffered write without data space reserved may not be able
				3983	* to fall back to CoW due to lack of data space, thus could cause
				3984	* data loss.
				3985	*
				3986	* Here we take a shortcut by flushing the whole inode, so that all
				3987	* nocow write should reach disk as nocow before we increase the
				3988	* reference of the extent. We could do better by only flushing NOCOW
				3989	* data, but that needs extra accounting.
				3990	*
				3991	* Also we don't need to check ASYNC_EXTENT, as async extent will be
				3992	* CoWed anyway, not affecting nocow part.
				3993	*/
				3994	ret = filemap_flush(inode_in->i_mapping);
				3995	if (ret < 0)
				3996	return ret;
				3997
				3998	ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
				3999	wb_len);
				4000	if (ret < 0)
				4001	return ret;
				4002	ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
				4003	wb_len);
				4004	if (ret < 0)
				4005	return ret;
				4006
				4007	return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
				4008	len, remap_flags);
				4009	}
				4010
				4011	loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
				4012	struct file *dst_file, loff_t destoff, loff_t len,
				4013	unsigned int remap_flags)
				4014	{
				4015	struct inode *src_inode = file_inode(src_file);
				4016	struct inode *dst_inode = file_inode(dst_file);
				4017	bool same_inode = dst_inode == src_inode;
				4018	int ret;
				4019
				4020	if (remap_flags & ~(REMAP_FILE_DEDUP \| REMAP_FILE_ADVISORY))
				4021	return -EINVAL;
				4022
				4023	if (same_inode)
				4024	inode_lock(src_inode);
				4025	else
				4026	lock_two_nondirectories(src_inode, dst_inode);
				4027
				4028	ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff,
				4029	&len, remap_flags);
				4030	if (ret < 0 \|\| len == 0)
				4031	goto out_unlock;
				4032
				4033	if (remap_flags & REMAP_FILE_DEDUP)
				4034	ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff);
				4035	else
				4036	ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
				4037
				4038	out_unlock:
				4039	if (same_inode)
				4040	inode_unlock(src_inode);
				4041	else
				4042	unlock_two_nondirectories(src_inode, dst_inode);
				4043
				4044	return ret < 0 ? ret : len;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4045	}
				4046
				4047	static long btrfs_ioctl_default_subvol(struct file file, void __user argp)
				4048	{
				4049	struct inode *inode = file_inode(file);
				4050	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				4051	struct btrfs_root *root = BTRFS_I(inode)->root;
				4052	struct btrfs_root *new_root;
				4053	struct btrfs_dir_item *di;
				4054	struct btrfs_trans_handle *trans;
				4055	struct btrfs_path *path;
				4056	struct btrfs_key location;
				4057	struct btrfs_disk_key disk_key;
				4058	u64 objectid = 0;
				4059	u64 dir_id;
				4060	int ret;
				4061
				4062	if (!capable(CAP_SYS_ADMIN))
				4063	return -EPERM;
				4064
				4065	ret = mnt_want_write_file(file);
				4066	if (ret)
				4067	return ret;
				4068
				4069	if (copy_from_user(&objectid, argp, sizeof(objectid))) {
				4070	ret = -EFAULT;
				4071	goto out;
				4072	}
				4073
				4074	if (!objectid)
				4075	objectid = BTRFS_FS_TREE_OBJECTID;
				4076
				4077	location.objectid = objectid;
				4078	location.type = BTRFS_ROOT_ITEM_KEY;
				4079	location.offset = (u64)-1;
				4080
				4081	new_root = btrfs_read_fs_root_no_name(fs_info, &location);
				4082	if (IS_ERR(new_root)) {
				4083	ret = PTR_ERR(new_root);
				4084	goto out;
				4085	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4086	if (!is_fstree(new_root->root_key.objectid)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4087	ret = -ENOENT;
				4088	goto out;
				4089	}
				4090
				4091	path = btrfs_alloc_path();
				4092	if (!path) {
				4093	ret = -ENOMEM;
				4094	goto out;
				4095	}
				4096	path->leave_spinning = 1;
				4097
				4098	trans = btrfs_start_transaction(root, 1);
				4099	if (IS_ERR(trans)) {
				4100	btrfs_free_path(path);
				4101	ret = PTR_ERR(trans);
				4102	goto out;
				4103	}
				4104
				4105	dir_id = btrfs_super_root_dir(fs_info->super_copy);
				4106	di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
				4107	dir_id, "default", 7, 1);
				4108	if (IS_ERR_OR_NULL(di)) {
				4109	btrfs_free_path(path);
				4110	btrfs_end_transaction(trans);
				4111	btrfs_err(fs_info,
				4112	"Umm, you don't have the default diritem, this isn't going to work");
				4113	ret = -ENOENT;
				4114	goto out;
				4115	}
				4116
				4117	btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
				4118	btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
				4119	btrfs_mark_buffer_dirty(path->nodes[0]);
				4120	btrfs_free_path(path);
				4121
				4122	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL);
				4123	btrfs_end_transaction(trans);
				4124	out:
				4125	mnt_drop_write_file(file);
				4126	return ret;
				4127	}
				4128
				4129	static void get_block_group_info(struct list_head *groups_list,
				4130	struct btrfs_ioctl_space_info *space)
				4131	{
				4132	struct btrfs_block_group_cache *block_group;
				4133
				4134	space->total_bytes = 0;
				4135	space->used_bytes = 0;
				4136	space->flags = 0;
				4137	list_for_each_entry(block_group, groups_list, list) {
				4138	space->flags = block_group->flags;
				4139	space->total_bytes += block_group->key.offset;
				4140	space->used_bytes +=
				4141	btrfs_block_group_used(&block_group->item);
				4142	}
				4143	}
				4144
				4145	static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
				4146	void __user *arg)
				4147	{
				4148	struct btrfs_ioctl_space_args space_args;
				4149	struct btrfs_ioctl_space_info space;
				4150	struct btrfs_ioctl_space_info *dest;
				4151	struct btrfs_ioctl_space_info *dest_orig;
				4152	struct btrfs_ioctl_space_info __user *user_dest;
				4153	struct btrfs_space_info *info;
				4154	static const u64 types[] = {
				4155	BTRFS_BLOCK_GROUP_DATA,
				4156	BTRFS_BLOCK_GROUP_SYSTEM,
				4157	BTRFS_BLOCK_GROUP_METADATA,
				4158	BTRFS_BLOCK_GROUP_DATA \| BTRFS_BLOCK_GROUP_METADATA
				4159	};
				4160	int num_types = 4;
				4161	int alloc_size;
				4162	int ret = 0;
				4163	u64 slot_count = 0;
				4164	int i, c;
				4165
				4166	if (copy_from_user(&space_args,
				4167	(struct btrfs_ioctl_space_args __user *)arg,
				4168	sizeof(space_args)))
				4169	return -EFAULT;
				4170
				4171	for (i = 0; i < num_types; i++) {
				4172	struct btrfs_space_info *tmp;
				4173
				4174	info = NULL;
				4175	rcu_read_lock();
				4176	list_for_each_entry_rcu(tmp, &fs_info->space_info,
				4177	list) {
				4178	if (tmp->flags == types[i]) {
				4179	info = tmp;
				4180	break;
				4181	}
				4182	}
				4183	rcu_read_unlock();
				4184
				4185	if (!info)
				4186	continue;
				4187
				4188	down_read(&info->groups_sem);
				4189	for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
				4190	if (!list_empty(&info->block_groups[c]))
				4191	slot_count++;
				4192	}
				4193	up_read(&info->groups_sem);
				4194	}
				4195
				4196	/*
				4197	* Global block reserve, exported as a space_info
				4198	*/
				4199	slot_count++;
				4200
				4201	/* space_slots == 0 means they are asking for a count */
				4202	if (space_args.space_slots == 0) {
				4203	space_args.total_spaces = slot_count;
				4204	goto out;
				4205	}
				4206
				4207	slot_count = min_t(u64, space_args.space_slots, slot_count);
				4208
				4209	alloc_size = sizeof(dest) slot_count;
				4210
				4211	/* we generally have at most 6 or so space infos, one for each raid
				4212	* level. So, a whole page should be more than enough for everyone
				4213	*/
				4214	if (alloc_size > PAGE_SIZE)
				4215	return -ENOMEM;
				4216
				4217	space_args.total_spaces = 0;
				4218	dest = kmalloc(alloc_size, GFP_KERNEL);
				4219	if (!dest)
				4220	return -ENOMEM;
				4221	dest_orig = dest;
				4222
				4223	/* now we have a buffer to copy into */
				4224	for (i = 0; i < num_types; i++) {
				4225	struct btrfs_space_info *tmp;
				4226
				4227	if (!slot_count)
				4228	break;
				4229
				4230	info = NULL;
				4231	rcu_read_lock();
				4232	list_for_each_entry_rcu(tmp, &fs_info->space_info,
				4233	list) {
				4234	if (tmp->flags == types[i]) {
				4235	info = tmp;
				4236	break;
				4237	}
				4238	}
				4239	rcu_read_unlock();
				4240
				4241	if (!info)
				4242	continue;
				4243	down_read(&info->groups_sem);
				4244	for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
				4245	if (!list_empty(&info->block_groups[c])) {
				4246	get_block_group_info(&info->block_groups[c],
				4247	&space);
				4248	memcpy(dest, &space, sizeof(space));
				4249	dest++;
				4250	space_args.total_spaces++;
				4251	slot_count--;
				4252	}
				4253	if (!slot_count)
				4254	break;
				4255	}
				4256	up_read(&info->groups_sem);
				4257	}
				4258
				4259	/*
				4260	* Add global block reserve
				4261	*/
				4262	if (slot_count) {
				4263	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
				4264
				4265	spin_lock(&block_rsv->lock);
				4266	space.total_bytes = block_rsv->size;
				4267	space.used_bytes = block_rsv->size - block_rsv->reserved;
				4268	spin_unlock(&block_rsv->lock);
				4269	space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
				4270	memcpy(dest, &space, sizeof(space));
				4271	space_args.total_spaces++;
				4272	}
				4273
				4274	user_dest = (struct btrfs_ioctl_space_info __user *)
				4275	(arg + sizeof(struct btrfs_ioctl_space_args));
				4276
				4277	if (copy_to_user(user_dest, dest_orig, alloc_size))
				4278	ret = -EFAULT;
				4279
				4280	kfree(dest_orig);
				4281	out:
				4282	if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
				4283	ret = -EFAULT;
				4284
				4285	return ret;
				4286	}
				4287
				4288	static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
				4289	void __user *argp)
				4290	{
				4291	struct btrfs_trans_handle *trans;
				4292	u64 transid;
				4293	int ret;
				4294
				4295	trans = btrfs_attach_transaction_barrier(root);
				4296	if (IS_ERR(trans)) {
				4297	if (PTR_ERR(trans) != -ENOENT)
				4298	return PTR_ERR(trans);
				4299
				4300	/* No running transaction, don't bother */
				4301	transid = root->fs_info->last_trans_committed;
				4302	goto out;
				4303	}
				4304	transid = trans->transid;
				4305	ret = btrfs_commit_transaction_async(trans, 0);
				4306	if (ret) {
				4307	btrfs_end_transaction(trans);
				4308	return ret;
				4309	}
				4310	out:
				4311	if (argp)
				4312	if (copy_to_user(argp, &transid, sizeof(transid)))
				4313	return -EFAULT;
				4314	return 0;
				4315	}
				4316
				4317	static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
				4318	void __user *argp)
				4319	{
				4320	u64 transid;
				4321
				4322	if (argp) {
				4323	if (copy_from_user(&transid, argp, sizeof(transid)))
				4324	return -EFAULT;
				4325	} else {
				4326	transid = 0; /* current trans */
				4327	}
				4328	return btrfs_wait_for_commit(fs_info, transid);
				4329	}
				4330
				4331	static long btrfs_ioctl_scrub(struct file file, void __user arg)
				4332	{
				4333	struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);
				4334	struct btrfs_ioctl_scrub_args *sa;
				4335	int ret;
				4336
				4337	if (!capable(CAP_SYS_ADMIN))
				4338	return -EPERM;
				4339
				4340	sa = memdup_user(arg, sizeof(*sa));
				4341	if (IS_ERR(sa))
				4342	return PTR_ERR(sa);
				4343
				4344	if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
				4345	ret = mnt_want_write_file(file);
				4346	if (ret)
				4347	goto out;
				4348	}
				4349
				4350	ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end,
				4351	&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
				4352	0);
				4353
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	4354	/*
				4355	* Copy scrub args to user space even if btrfs_scrub_dev() returned an
				4356	* error. This is important as it allows user space to know how much
				4357	* progress scrub has done. For example, if scrub is canceled we get
				4358	* -ECANCELED from btrfs_scrub_dev() and return that error back to user
				4359	* space. Later user space can inspect the progress from the structure
				4360	* btrfs_ioctl_scrub_args and resume scrub from where it left off
				4361	* previously (btrfs-progs does this).
				4362	* If we fail to copy the btrfs_ioctl_scrub_args structure to user space
				4363	* then return -EFAULT to signal the structure was not copied or it may
				4364	* be corrupt and unreliable due to a partial copy.
				4365	*/
				4366	if (copy_to_user(arg, sa, sizeof(*sa)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4367	ret = -EFAULT;
				4368
				4369	if (!(sa->flags & BTRFS_SCRUB_READONLY))
				4370	mnt_drop_write_file(file);
				4371	out:
				4372	kfree(sa);
				4373	return ret;
				4374	}
				4375
				4376	static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info)
				4377	{
				4378	if (!capable(CAP_SYS_ADMIN))
				4379	return -EPERM;
				4380
				4381	return btrfs_scrub_cancel(fs_info);
				4382	}
				4383
				4384	static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info,
				4385	void __user *arg)
				4386	{
				4387	struct btrfs_ioctl_scrub_args *sa;
				4388	int ret;
				4389
				4390	if (!capable(CAP_SYS_ADMIN))
				4391	return -EPERM;
				4392
				4393	sa = memdup_user(arg, sizeof(*sa));
				4394	if (IS_ERR(sa))
				4395	return PTR_ERR(sa);
				4396
				4397	ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress);
				4398
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4399	if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4400	ret = -EFAULT;
				4401
				4402	kfree(sa);
				4403	return ret;
				4404	}
				4405
				4406	static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info,
				4407	void __user *arg)
				4408	{
				4409	struct btrfs_ioctl_get_dev_stats *sa;
				4410	int ret;
				4411
				4412	sa = memdup_user(arg, sizeof(*sa));
				4413	if (IS_ERR(sa))
				4414	return PTR_ERR(sa);
				4415
				4416	if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
				4417	kfree(sa);
				4418	return -EPERM;
				4419	}
				4420
				4421	ret = btrfs_get_dev_stats(fs_info, sa);
				4422
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4423	if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4424	ret = -EFAULT;
				4425
				4426	kfree(sa);
				4427	return ret;
				4428	}
				4429
				4430	static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
				4431	void __user *arg)
				4432	{
				4433	struct btrfs_ioctl_dev_replace_args *p;
				4434	int ret;
				4435
				4436	if (!capable(CAP_SYS_ADMIN))
				4437	return -EPERM;
				4438
				4439	p = memdup_user(arg, sizeof(*p));
				4440	if (IS_ERR(p))
				4441	return PTR_ERR(p);
				4442
				4443	switch (p->cmd) {
				4444	case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
				4445	if (sb_rdonly(fs_info->sb)) {
				4446	ret = -EROFS;
				4447	goto out;
				4448	}
				4449	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
				4450	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				4451	} else {
				4452	ret = btrfs_dev_replace_by_ioctl(fs_info, p);
				4453	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				4454	}
				4455	break;
				4456	case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS:
				4457	btrfs_dev_replace_status(fs_info, p);
				4458	ret = 0;
				4459	break;
				4460	case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL:
				4461	p->result = btrfs_dev_replace_cancel(fs_info);
				4462	ret = 0;
				4463	break;
				4464	default:
				4465	ret = -EINVAL;
				4466	break;
				4467	}
				4468
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4469	if ((ret == 0 \|\| ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4470	ret = -EFAULT;
				4471	out:
				4472	kfree(p);
				4473	return ret;
				4474	}
				4475
				4476	static long btrfs_ioctl_ino_to_path(struct btrfs_root root, void __user arg)
				4477	{
				4478	int ret = 0;
				4479	int i;
				4480	u64 rel_ptr;
				4481	int size;
				4482	struct btrfs_ioctl_ino_path_args *ipa = NULL;
				4483	struct inode_fs_paths *ipath = NULL;
				4484	struct btrfs_path *path;
				4485
				4486	if (!capable(CAP_DAC_READ_SEARCH))
				4487	return -EPERM;
				4488
				4489	path = btrfs_alloc_path();
				4490	if (!path) {
				4491	ret = -ENOMEM;
				4492	goto out;
				4493	}
				4494
				4495	ipa = memdup_user(arg, sizeof(*ipa));
				4496	if (IS_ERR(ipa)) {
				4497	ret = PTR_ERR(ipa);
				4498	ipa = NULL;
				4499	goto out;
				4500	}
				4501
				4502	size = min_t(u32, ipa->size, 4096);
				4503	ipath = init_ipath(size, root, path);
				4504	if (IS_ERR(ipath)) {
				4505	ret = PTR_ERR(ipath);
				4506	ipath = NULL;
				4507	goto out;
				4508	}
				4509
				4510	ret = paths_from_inode(ipa->inum, ipath);
				4511	if (ret < 0)
				4512	goto out;
				4513
				4514	for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
				4515	rel_ptr = ipath->fspath->val[i] -
				4516	(u64)(unsigned long)ipath->fspath->val;
				4517	ipath->fspath->val[i] = rel_ptr;
				4518	}
				4519
				4520	ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
				4521	ipath->fspath, size);
				4522	if (ret) {
				4523	ret = -EFAULT;
				4524	goto out;
				4525	}
				4526
				4527	out:
				4528	btrfs_free_path(path);
				4529	free_ipath(ipath);
				4530	kfree(ipa);
				4531
				4532	return ret;
				4533	}
				4534
				4535	static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
				4536	{
				4537	struct btrfs_data_container *inodes = ctx;
				4538	const size_t c = 3 * sizeof(u64);
				4539
				4540	if (inodes->bytes_left >= c) {
				4541	inodes->bytes_left -= c;
				4542	inodes->val[inodes->elem_cnt] = inum;
				4543	inodes->val[inodes->elem_cnt + 1] = offset;
				4544	inodes->val[inodes->elem_cnt + 2] = root;
				4545	inodes->elem_cnt += 3;
				4546	} else {
				4547	inodes->bytes_missing += c - inodes->bytes_left;
				4548	inodes->bytes_left = 0;
				4549	inodes->elem_missed += 3;
				4550	}
				4551
				4552	return 0;
				4553	}
				4554
				4555	static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
				4556	void __user *arg, int version)
				4557	{
				4558	int ret = 0;
				4559	int size;
				4560	struct btrfs_ioctl_logical_ino_args *loi;
				4561	struct btrfs_data_container *inodes = NULL;
				4562	struct btrfs_path *path = NULL;
				4563	bool ignore_offset;
				4564
				4565	if (!capable(CAP_SYS_ADMIN))
				4566	return -EPERM;
				4567
				4568	loi = memdup_user(arg, sizeof(*loi));
				4569	if (IS_ERR(loi))
				4570	return PTR_ERR(loi);
				4571
				4572	if (version == 1) {
				4573	ignore_offset = false;
				4574	size = min_t(u32, loi->size, SZ_64K);
				4575	} else {
				4576	/* All reserved bits must be 0 for now */
				4577	if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) {
				4578	ret = -EINVAL;
				4579	goto out_loi;
				4580	}
				4581	/* Only accept flags we have defined so far */
				4582	if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) {
				4583	ret = -EINVAL;
				4584	goto out_loi;
				4585	}
				4586	ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET;
				4587	size = min_t(u32, loi->size, SZ_16M);
				4588	}
				4589
				4590	path = btrfs_alloc_path();
				4591	if (!path) {
				4592	ret = -ENOMEM;
				4593	goto out;
				4594	}
				4595
				4596	inodes = init_data_container(size);
				4597	if (IS_ERR(inodes)) {
				4598	ret = PTR_ERR(inodes);
				4599	inodes = NULL;
				4600	goto out;
				4601	}
				4602
				4603	ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
				4604	build_ino_list, inodes, ignore_offset);
				4605	if (ret == -EINVAL)
				4606	ret = -ENOENT;
				4607	if (ret < 0)
				4608	goto out;
				4609
				4610	ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes,
				4611	size);
				4612	if (ret)
				4613	ret = -EFAULT;
				4614
				4615	out:
				4616	btrfs_free_path(path);
				4617	kvfree(inodes);
				4618	out_loi:
				4619	kfree(loi);
				4620
				4621	return ret;
				4622	}
				4623
				4624	void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
				4625	struct btrfs_ioctl_balance_args *bargs)
				4626	{
				4627	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
				4628
				4629	bargs->flags = bctl->flags;
				4630
				4631	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
				4632	bargs->state \|= BTRFS_BALANCE_STATE_RUNNING;
				4633	if (atomic_read(&fs_info->balance_pause_req))
				4634	bargs->state \|= BTRFS_BALANCE_STATE_PAUSE_REQ;
				4635	if (atomic_read(&fs_info->balance_cancel_req))
				4636	bargs->state \|= BTRFS_BALANCE_STATE_CANCEL_REQ;
				4637
				4638	memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
				4639	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
				4640	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
				4641
				4642	spin_lock(&fs_info->balance_lock);
				4643	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
				4644	spin_unlock(&fs_info->balance_lock);
				4645	}
				4646
				4647	static long btrfs_ioctl_balance(struct file file, void __user arg)
				4648	{
				4649	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
				4650	struct btrfs_fs_info *fs_info = root->fs_info;
				4651	struct btrfs_ioctl_balance_args *bargs;
				4652	struct btrfs_balance_control *bctl;
				4653	bool need_unlock; /* for mut. excl. ops lock */
				4654	int ret;
				4655
				4656	if (!capable(CAP_SYS_ADMIN))
				4657	return -EPERM;
				4658
				4659	ret = mnt_want_write_file(file);
				4660	if (ret)
				4661	return ret;
				4662
				4663	again:
				4664	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
				4665	mutex_lock(&fs_info->balance_mutex);
				4666	need_unlock = true;
				4667	goto locked;
				4668	}
				4669
				4670	/*
				4671	* mut. excl. ops lock is locked. Three possibilities:
				4672	* (1) some other op is running
				4673	* (2) balance is running
				4674	* (3) balance is paused -- special case (think resume)
				4675	*/
				4676	mutex_lock(&fs_info->balance_mutex);
				4677	if (fs_info->balance_ctl) {
				4678	/* this is either (2) or (3) */
				4679	if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
				4680	mutex_unlock(&fs_info->balance_mutex);
				4681	/*
				4682	* Lock released to allow other waiters to continue,
				4683	* we'll reexamine the status again.
				4684	*/
				4685	mutex_lock(&fs_info->balance_mutex);
				4686
				4687	if (fs_info->balance_ctl &&
				4688	!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
				4689	/* this is (3) */
				4690	need_unlock = false;
				4691	goto locked;
				4692	}
				4693
				4694	mutex_unlock(&fs_info->balance_mutex);
				4695	goto again;
				4696	} else {
				4697	/* this is (2) */
				4698	mutex_unlock(&fs_info->balance_mutex);
				4699	ret = -EINPROGRESS;
				4700	goto out;
				4701	}
				4702	} else {
				4703	/* this is (1) */
				4704	mutex_unlock(&fs_info->balance_mutex);
				4705	ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
				4706	goto out;
				4707	}
				4708
				4709	locked:
				4710	BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
				4711
				4712	if (arg) {
				4713	bargs = memdup_user(arg, sizeof(*bargs));
				4714	if (IS_ERR(bargs)) {
				4715	ret = PTR_ERR(bargs);
				4716	goto out_unlock;
				4717	}
				4718
				4719	if (bargs->flags & BTRFS_BALANCE_RESUME) {
				4720	if (!fs_info->balance_ctl) {
				4721	ret = -ENOTCONN;
				4722	goto out_bargs;
				4723	}
				4724
				4725	bctl = fs_info->balance_ctl;
				4726	spin_lock(&fs_info->balance_lock);
				4727	bctl->flags \|= BTRFS_BALANCE_RESUME;
				4728	spin_unlock(&fs_info->balance_lock);
				4729
				4730	goto do_balance;
				4731	}
				4732	} else {
				4733	bargs = NULL;
				4734	}
				4735
				4736	if (fs_info->balance_ctl) {
				4737	ret = -EINPROGRESS;
				4738	goto out_bargs;
				4739	}
				4740
				4741	bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
				4742	if (!bctl) {
				4743	ret = -ENOMEM;
				4744	goto out_bargs;
				4745	}
				4746
				4747	if (arg) {
				4748	memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
				4749	memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
				4750	memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
				4751
				4752	bctl->flags = bargs->flags;
				4753	} else {
				4754	/* balance everything - no filters */
				4755	bctl->flags \|= BTRFS_BALANCE_TYPE_MASK;
				4756	}
				4757
				4758	if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK \| BTRFS_BALANCE_TYPE_MASK)) {
				4759	ret = -EINVAL;
				4760	goto out_bctl;
				4761	}
				4762
				4763	do_balance:
				4764	/*
				4765	* Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
				4766	* btrfs_balance. bctl is freed in reset_balance_state, or, if
				4767	* restriper was paused all the way until unmount, in free_fs_info.
				4768	* The flag should be cleared after reset_balance_state.
				4769	*/
				4770	need_unlock = false;
				4771
				4772	ret = btrfs_balance(fs_info, bctl, bargs);
				4773	bctl = NULL;
				4774
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4775	if ((ret == 0 \|\| ret == -ECANCELED) && arg) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4776	if (copy_to_user(arg, bargs, sizeof(*bargs)))
				4777	ret = -EFAULT;
				4778	}
				4779
				4780	out_bctl:
				4781	kfree(bctl);
				4782	out_bargs:
				4783	kfree(bargs);
				4784	out_unlock:
				4785	mutex_unlock(&fs_info->balance_mutex);
				4786	if (need_unlock)
				4787	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
				4788	out:
				4789	mnt_drop_write_file(file);
				4790	return ret;
				4791	}
				4792
				4793	static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd)
				4794	{
				4795	if (!capable(CAP_SYS_ADMIN))
				4796	return -EPERM;
				4797
				4798	switch (cmd) {
				4799	case BTRFS_BALANCE_CTL_PAUSE:
				4800	return btrfs_pause_balance(fs_info);
				4801	case BTRFS_BALANCE_CTL_CANCEL:
				4802	return btrfs_cancel_balance(fs_info);
				4803	}
				4804
				4805	return -EINVAL;
				4806	}
				4807
				4808	static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info,
				4809	void __user *arg)
				4810	{
				4811	struct btrfs_ioctl_balance_args *bargs;
				4812	int ret = 0;
				4813
				4814	if (!capable(CAP_SYS_ADMIN))
				4815	return -EPERM;
				4816
				4817	mutex_lock(&fs_info->balance_mutex);
				4818	if (!fs_info->balance_ctl) {
				4819	ret = -ENOTCONN;
				4820	goto out;
				4821	}
				4822
				4823	bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
				4824	if (!bargs) {
				4825	ret = -ENOMEM;
				4826	goto out;
				4827	}
				4828
				4829	btrfs_update_ioctl_balance_args(fs_info, bargs);
				4830
				4831	if (copy_to_user(arg, bargs, sizeof(*bargs)))
				4832	ret = -EFAULT;
				4833
				4834	kfree(bargs);
				4835	out:
				4836	mutex_unlock(&fs_info->balance_mutex);
				4837	return ret;
				4838	}
				4839
				4840	static long btrfs_ioctl_quota_ctl(struct file file, void __user arg)
				4841	{
				4842	struct inode *inode = file_inode(file);
				4843	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				4844	struct btrfs_ioctl_quota_ctl_args *sa;
				4845	int ret;
				4846
				4847	if (!capable(CAP_SYS_ADMIN))
				4848	return -EPERM;
				4849
				4850	ret = mnt_want_write_file(file);
				4851	if (ret)
				4852	return ret;
				4853
				4854	sa = memdup_user(arg, sizeof(*sa));
				4855	if (IS_ERR(sa)) {
				4856	ret = PTR_ERR(sa);
				4857	goto drop_write;
				4858	}
				4859
				4860	down_write(&fs_info->subvol_sem);
				4861
				4862	switch (sa->cmd) {
				4863	case BTRFS_QUOTA_CTL_ENABLE:
				4864	ret = btrfs_quota_enable(fs_info);
				4865	break;
				4866	case BTRFS_QUOTA_CTL_DISABLE:
				4867	ret = btrfs_quota_disable(fs_info);
				4868	break;
				4869	default:
				4870	ret = -EINVAL;
				4871	break;
				4872	}
				4873
				4874	kfree(sa);
				4875	up_write(&fs_info->subvol_sem);
				4876	drop_write:
				4877	mnt_drop_write_file(file);
				4878	return ret;
				4879	}
				4880
				4881	static long btrfs_ioctl_qgroup_assign(struct file file, void __user arg)
				4882	{
				4883	struct inode *inode = file_inode(file);
				4884	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				4885	struct btrfs_root *root = BTRFS_I(inode)->root;
				4886	struct btrfs_ioctl_qgroup_assign_args *sa;
				4887	struct btrfs_trans_handle *trans;
				4888	int ret;
				4889	int err;
				4890
				4891	if (!capable(CAP_SYS_ADMIN))
				4892	return -EPERM;
				4893
				4894	ret = mnt_want_write_file(file);
				4895	if (ret)
				4896	return ret;
				4897
				4898	sa = memdup_user(arg, sizeof(*sa));
				4899	if (IS_ERR(sa)) {
				4900	ret = PTR_ERR(sa);
				4901	goto drop_write;
				4902	}
				4903
				4904	trans = btrfs_join_transaction(root);
				4905	if (IS_ERR(trans)) {
				4906	ret = PTR_ERR(trans);
				4907	goto out;
				4908	}
				4909
				4910	if (sa->assign) {
				4911	ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
				4912	} else {
				4913	ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
				4914	}
				4915
				4916	/* update qgroup status and info */
				4917	err = btrfs_run_qgroups(trans);
				4918	if (err < 0)
				4919	btrfs_handle_fs_error(fs_info, err,
				4920	"failed to update qgroup status and info");
				4921	err = btrfs_end_transaction(trans);
				4922	if (err && !ret)
				4923	ret = err;
				4924
				4925	out:
				4926	kfree(sa);
				4927	drop_write:
				4928	mnt_drop_write_file(file);
				4929	return ret;
				4930	}
				4931
				4932	static long btrfs_ioctl_qgroup_create(struct file file, void __user arg)
				4933	{
				4934	struct inode *inode = file_inode(file);
				4935	struct btrfs_root *root = BTRFS_I(inode)->root;
				4936	struct btrfs_ioctl_qgroup_create_args *sa;
				4937	struct btrfs_trans_handle *trans;
				4938	int ret;
				4939	int err;
				4940
				4941	if (!capable(CAP_SYS_ADMIN))
				4942	return -EPERM;
				4943
				4944	ret = mnt_want_write_file(file);
				4945	if (ret)
				4946	return ret;
				4947
				4948	sa = memdup_user(arg, sizeof(*sa));
				4949	if (IS_ERR(sa)) {
				4950	ret = PTR_ERR(sa);
				4951	goto drop_write;
				4952	}
				4953
				4954	if (!sa->qgroupid) {
				4955	ret = -EINVAL;
				4956	goto out;
				4957	}
				4958
				4959	trans = btrfs_join_transaction(root);
				4960	if (IS_ERR(trans)) {
				4961	ret = PTR_ERR(trans);
				4962	goto out;
				4963	}
				4964
				4965	if (sa->create) {
				4966	ret = btrfs_create_qgroup(trans, sa->qgroupid);
				4967	} else {
				4968	ret = btrfs_remove_qgroup(trans, sa->qgroupid);
				4969	}
				4970
				4971	err = btrfs_end_transaction(trans);
				4972	if (err && !ret)
				4973	ret = err;
				4974
				4975	out:
				4976	kfree(sa);
				4977	drop_write:
				4978	mnt_drop_write_file(file);
				4979	return ret;
				4980	}
				4981
				4982	static long btrfs_ioctl_qgroup_limit(struct file file, void __user arg)
				4983	{
				4984	struct inode *inode = file_inode(file);
				4985	struct btrfs_root *root = BTRFS_I(inode)->root;
				4986	struct btrfs_ioctl_qgroup_limit_args *sa;
				4987	struct btrfs_trans_handle *trans;
				4988	int ret;
				4989	int err;
				4990	u64 qgroupid;
				4991
				4992	if (!capable(CAP_SYS_ADMIN))
				4993	return -EPERM;
				4994
				4995	ret = mnt_want_write_file(file);
				4996	if (ret)
				4997	return ret;
				4998
				4999	sa = memdup_user(arg, sizeof(*sa));
				5000	if (IS_ERR(sa)) {
				5001	ret = PTR_ERR(sa);
				5002	goto drop_write;
				5003	}
				5004
				5005	trans = btrfs_join_transaction(root);
				5006	if (IS_ERR(trans)) {
				5007	ret = PTR_ERR(trans);
				5008	goto out;
				5009	}
				5010
				5011	qgroupid = sa->qgroupid;
				5012	if (!qgroupid) {
				5013	/* take the current subvol as qgroup */
				5014	qgroupid = root->root_key.objectid;
				5015	}
				5016
				5017	ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim);
				5018
				5019	err = btrfs_end_transaction(trans);
				5020	if (err && !ret)
				5021	ret = err;
				5022
				5023	out:
				5024	kfree(sa);
				5025	drop_write:
				5026	mnt_drop_write_file(file);
				5027	return ret;
				5028	}
				5029
				5030	static long btrfs_ioctl_quota_rescan(struct file file, void __user arg)
				5031	{
				5032	struct inode *inode = file_inode(file);
				5033	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5034	struct btrfs_ioctl_quota_rescan_args *qsa;
				5035	int ret;
				5036
				5037	if (!capable(CAP_SYS_ADMIN))
				5038	return -EPERM;
				5039
				5040	ret = mnt_want_write_file(file);
				5041	if (ret)
				5042	return ret;
				5043
				5044	qsa = memdup_user(arg, sizeof(*qsa));
				5045	if (IS_ERR(qsa)) {
				5046	ret = PTR_ERR(qsa);
				5047	goto drop_write;
				5048	}
				5049
				5050	if (qsa->flags) {
				5051	ret = -EINVAL;
				5052	goto out;
				5053	}
				5054
				5055	ret = btrfs_qgroup_rescan(fs_info);
				5056
				5057	out:
				5058	kfree(qsa);
				5059	drop_write:
				5060	mnt_drop_write_file(file);
				5061	return ret;
				5062	}
				5063
				5064	static long btrfs_ioctl_quota_rescan_status(struct file file, void __user arg)
				5065	{
				5066	struct inode *inode = file_inode(file);
				5067	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5068	struct btrfs_ioctl_quota_rescan_args *qsa;
				5069	int ret = 0;
				5070
				5071	if (!capable(CAP_SYS_ADMIN))
				5072	return -EPERM;
				5073
				5074	qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
				5075	if (!qsa)
				5076	return -ENOMEM;
				5077
				5078	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
				5079	qsa->flags = 1;
				5080	qsa->progress = fs_info->qgroup_rescan_progress.objectid;
				5081	}
				5082
				5083	if (copy_to_user(arg, qsa, sizeof(*qsa)))
				5084	ret = -EFAULT;
				5085
				5086	kfree(qsa);
				5087	return ret;
				5088	}
				5089
				5090	static long btrfs_ioctl_quota_rescan_wait(struct file file, void __user arg)
				5091	{
				5092	struct inode *inode = file_inode(file);
				5093	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5094
				5095	if (!capable(CAP_SYS_ADMIN))
				5096	return -EPERM;
				5097
				5098	return btrfs_qgroup_wait_for_completion(fs_info, true);
				5099	}
				5100
				5101	static long _btrfs_ioctl_set_received_subvol(struct file *file,
				5102	struct btrfs_ioctl_received_subvol_args *sa)
				5103	{
				5104	struct inode *inode = file_inode(file);
				5105	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5106	struct btrfs_root *root = BTRFS_I(inode)->root;
				5107	struct btrfs_root_item *root_item = &root->root_item;
				5108	struct btrfs_trans_handle *trans;
				5109	struct timespec64 ct = current_time(inode);
				5110	int ret = 0;
				5111	int received_uuid_changed;
				5112
				5113	if (!inode_owner_or_capable(inode))
				5114	return -EPERM;
				5115
				5116	ret = mnt_want_write_file(file);
				5117	if (ret < 0)
				5118	return ret;
				5119
				5120	down_write(&fs_info->subvol_sem);
				5121
				5122	if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
				5123	ret = -EINVAL;
				5124	goto out;
				5125	}
				5126
				5127	if (btrfs_root_readonly(root)) {
				5128	ret = -EROFS;
				5129	goto out;
				5130	}
				5131
				5132	/*
				5133	* 1 - root item
				5134	* 2 - uuid items (received uuid + subvol uuid)
				5135	*/
				5136	trans = btrfs_start_transaction(root, 3);
				5137	if (IS_ERR(trans)) {
				5138	ret = PTR_ERR(trans);
				5139	trans = NULL;
				5140	goto out;
				5141	}
				5142
				5143	sa->rtransid = trans->transid;
				5144	sa->rtime.sec = ct.tv_sec;
				5145	sa->rtime.nsec = ct.tv_nsec;
				5146
				5147	received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
				5148	BTRFS_UUID_SIZE);
				5149	if (received_uuid_changed &&
				5150	!btrfs_is_empty_uuid(root_item->received_uuid)) {
				5151	ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
				5152	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
				5153	root->root_key.objectid);
				5154	if (ret && ret != -ENOENT) {
				5155	btrfs_abort_transaction(trans, ret);
				5156	btrfs_end_transaction(trans);
				5157	goto out;
				5158	}
				5159	}
				5160	memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
				5161	btrfs_set_root_stransid(root_item, sa->stransid);
				5162	btrfs_set_root_rtransid(root_item, sa->rtransid);
				5163	btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
				5164	btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
				5165	btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
				5166	btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
				5167
				5168	ret = btrfs_update_root(trans, fs_info->tree_root,
				5169	&root->root_key, &root->root_item);
				5170	if (ret < 0) {
				5171	btrfs_end_transaction(trans);
				5172	goto out;
				5173	}
				5174	if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
				5175	ret = btrfs_uuid_tree_add(trans, sa->uuid,
				5176	BTRFS_UUID_KEY_RECEIVED_SUBVOL,
				5177	root->root_key.objectid);
				5178	if (ret < 0 && ret != -EEXIST) {
				5179	btrfs_abort_transaction(trans, ret);
				5180	btrfs_end_transaction(trans);
				5181	goto out;
				5182	}
				5183	}
				5184	ret = btrfs_commit_transaction(trans);
				5185	out:
				5186	up_write(&fs_info->subvol_sem);
				5187	mnt_drop_write_file(file);
				5188	return ret;
				5189	}
				5190
				5191	#ifdef CONFIG_64BIT
				5192	static long btrfs_ioctl_set_received_subvol_32(struct file *file,
				5193	void __user *arg)
				5194	{
				5195	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
				5196	struct btrfs_ioctl_received_subvol_args *args64 = NULL;
				5197	int ret = 0;
				5198
				5199	args32 = memdup_user(arg, sizeof(*args32));
				5200	if (IS_ERR(args32))
				5201	return PTR_ERR(args32);
				5202
				5203	args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
				5204	if (!args64) {
				5205	ret = -ENOMEM;
				5206	goto out;
				5207	}
				5208
				5209	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
				5210	args64->stransid = args32->stransid;
				5211	args64->rtransid = args32->rtransid;
				5212	args64->stime.sec = args32->stime.sec;
				5213	args64->stime.nsec = args32->stime.nsec;
				5214	args64->rtime.sec = args32->rtime.sec;
				5215	args64->rtime.nsec = args32->rtime.nsec;
				5216	args64->flags = args32->flags;
				5217
				5218	ret = _btrfs_ioctl_set_received_subvol(file, args64);
				5219	if (ret)
				5220	goto out;
				5221
				5222	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
				5223	args32->stransid = args64->stransid;
				5224	args32->rtransid = args64->rtransid;
				5225	args32->stime.sec = args64->stime.sec;
				5226	args32->stime.nsec = args64->stime.nsec;
				5227	args32->rtime.sec = args64->rtime.sec;
				5228	args32->rtime.nsec = args64->rtime.nsec;
				5229	args32->flags = args64->flags;
				5230
				5231	ret = copy_to_user(arg, args32, sizeof(*args32));
				5232	if (ret)
				5233	ret = -EFAULT;
				5234
				5235	out:
				5236	kfree(args32);
				5237	kfree(args64);
				5238	return ret;
				5239	}
				5240	#endif
				5241
				5242	static long btrfs_ioctl_set_received_subvol(struct file *file,
				5243	void __user *arg)
				5244	{
				5245	struct btrfs_ioctl_received_subvol_args *sa = NULL;
				5246	int ret = 0;
				5247
				5248	sa = memdup_user(arg, sizeof(*sa));
				5249	if (IS_ERR(sa))
				5250	return PTR_ERR(sa);
				5251
				5252	ret = _btrfs_ioctl_set_received_subvol(file, sa);
				5253
				5254	if (ret)
				5255	goto out;
				5256
				5257	ret = copy_to_user(arg, sa, sizeof(*sa));
				5258	if (ret)
				5259	ret = -EFAULT;
				5260
				5261	out:
				5262	kfree(sa);
				5263	return ret;
				5264	}
				5265
				5266	static int btrfs_ioctl_get_fslabel(struct file file, void __user arg)
				5267	{
				5268	struct inode *inode = file_inode(file);
				5269	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5270	size_t len;
				5271	int ret;
				5272	char label[BTRFS_LABEL_SIZE];
				5273
				5274	spin_lock(&fs_info->super_lock);
				5275	memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE);
				5276	spin_unlock(&fs_info->super_lock);
				5277
				5278	len = strnlen(label, BTRFS_LABEL_SIZE);
				5279
				5280	if (len == BTRFS_LABEL_SIZE) {
				5281	btrfs_warn(fs_info,
				5282	"label is too long, return the first %zu bytes",
				5283	--len);
				5284	}
				5285
				5286	ret = copy_to_user(arg, label, len);
				5287
				5288	return ret ? -EFAULT : 0;
				5289	}
				5290
				5291	static int btrfs_ioctl_set_fslabel(struct file file, void __user arg)
				5292	{
				5293	struct inode *inode = file_inode(file);
				5294	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5295	struct btrfs_root *root = BTRFS_I(inode)->root;
				5296	struct btrfs_super_block *super_block = fs_info->super_copy;
				5297	struct btrfs_trans_handle *trans;
				5298	char label[BTRFS_LABEL_SIZE];
				5299	int ret;
				5300
				5301	if (!capable(CAP_SYS_ADMIN))
				5302	return -EPERM;
				5303
				5304	if (copy_from_user(label, arg, sizeof(label)))
				5305	return -EFAULT;
				5306
				5307	if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
				5308	btrfs_err(fs_info,
				5309	"unable to set label with more than %d bytes",
				5310	BTRFS_LABEL_SIZE - 1);
				5311	return -EINVAL;
				5312	}
				5313
				5314	ret = mnt_want_write_file(file);
				5315	if (ret)
				5316	return ret;
				5317
				5318	trans = btrfs_start_transaction(root, 0);
				5319	if (IS_ERR(trans)) {
				5320	ret = PTR_ERR(trans);
				5321	goto out_unlock;
				5322	}
				5323
				5324	spin_lock(&fs_info->super_lock);
				5325	strcpy(super_block->label, label);
				5326	spin_unlock(&fs_info->super_lock);
				5327	ret = btrfs_commit_transaction(trans);
				5328
				5329	out_unlock:
				5330	mnt_drop_write_file(file);
				5331	return ret;
				5332	}
				5333
				5334	#define INIT_FEATURE_FLAGS(suffix) \
				5335	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
				5336	.compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
				5337	.incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
				5338
				5339	int btrfs_ioctl_get_supported_features(void __user *arg)
				5340	{
				5341	static const struct btrfs_ioctl_feature_flags features[3] = {
				5342	INIT_FEATURE_FLAGS(SUPP),
				5343	INIT_FEATURE_FLAGS(SAFE_SET),
				5344	INIT_FEATURE_FLAGS(SAFE_CLEAR)
				5345	};
				5346
				5347	if (copy_to_user(arg, &features, sizeof(features)))
				5348	return -EFAULT;
				5349
				5350	return 0;
				5351	}
				5352
				5353	static int btrfs_ioctl_get_features(struct file file, void __user arg)
				5354	{
				5355	struct inode *inode = file_inode(file);
				5356	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5357	struct btrfs_super_block *super_block = fs_info->super_copy;
				5358	struct btrfs_ioctl_feature_flags features;
				5359
				5360	features.compat_flags = btrfs_super_compat_flags(super_block);
				5361	features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
				5362	features.incompat_flags = btrfs_super_incompat_flags(super_block);
				5363
				5364	if (copy_to_user(arg, &features, sizeof(features)))
				5365	return -EFAULT;
				5366
				5367	return 0;
				5368	}
				5369
				5370	static int check_feature_bits(struct btrfs_fs_info *fs_info,
				5371	enum btrfs_feature_set set,
				5372	u64 change_mask, u64 flags, u64 supported_flags,
				5373	u64 safe_set, u64 safe_clear)
				5374	{
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5375	const char *type = btrfs_feature_set_name(set);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5376	char *names;
				5377	u64 disallowed, unsupported;
				5378	u64 set_mask = flags & change_mask;
				5379	u64 clear_mask = ~flags & change_mask;
				5380
				5381	unsupported = set_mask & ~supported_flags;
				5382	if (unsupported) {
				5383	names = btrfs_printable_features(set, unsupported);
				5384	if (names) {
				5385	btrfs_warn(fs_info,
				5386	"this kernel does not support the %s feature bit%s",
				5387	names, strchr(names, ',') ? "s" : "");
				5388	kfree(names);
				5389	} else
				5390	btrfs_warn(fs_info,
				5391	"this kernel does not support %s bits 0x%llx",
				5392	type, unsupported);
				5393	return -EOPNOTSUPP;
				5394	}
				5395
				5396	disallowed = set_mask & ~safe_set;
				5397	if (disallowed) {
				5398	names = btrfs_printable_features(set, disallowed);
				5399	if (names) {
				5400	btrfs_warn(fs_info,
				5401	"can't set the %s feature bit%s while mounted",
				5402	names, strchr(names, ',') ? "s" : "");
				5403	kfree(names);
				5404	} else
				5405	btrfs_warn(fs_info,
				5406	"can't set %s bits 0x%llx while mounted",
				5407	type, disallowed);
				5408	return -EPERM;
				5409	}
				5410
				5411	disallowed = clear_mask & ~safe_clear;
				5412	if (disallowed) {
				5413	names = btrfs_printable_features(set, disallowed);
				5414	if (names) {
				5415	btrfs_warn(fs_info,
				5416	"can't clear the %s feature bit%s while mounted",
				5417	names, strchr(names, ',') ? "s" : "");
				5418	kfree(names);
				5419	} else
				5420	btrfs_warn(fs_info,
				5421	"can't clear %s bits 0x%llx while mounted",
				5422	type, disallowed);
				5423	return -EPERM;
				5424	}
				5425
				5426	return 0;
				5427	}
				5428
				5429	#define check_feature(fs_info, change_mask, flags, mask_base) \
				5430	check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \
				5431	BTRFS_FEATURE_ ## mask_base ## _SUPP, \
				5432	BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
				5433	BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
				5434
				5435	static int btrfs_ioctl_set_features(struct file file, void __user arg)
				5436	{
				5437	struct inode *inode = file_inode(file);
				5438	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5439	struct btrfs_root *root = BTRFS_I(inode)->root;
				5440	struct btrfs_super_block *super_block = fs_info->super_copy;
				5441	struct btrfs_ioctl_feature_flags flags[2];
				5442	struct btrfs_trans_handle *trans;
				5443	u64 newflags;
				5444	int ret;
				5445
				5446	if (!capable(CAP_SYS_ADMIN))
				5447	return -EPERM;
				5448
				5449	if (copy_from_user(flags, arg, sizeof(flags)))
				5450	return -EFAULT;
				5451
				5452	/* Nothing to do */
				5453	if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
				5454	!flags[0].incompat_flags)
				5455	return 0;
				5456
				5457	ret = check_feature(fs_info, flags[0].compat_flags,
				5458	flags[1].compat_flags, COMPAT);
				5459	if (ret)
				5460	return ret;
				5461
				5462	ret = check_feature(fs_info, flags[0].compat_ro_flags,
				5463	flags[1].compat_ro_flags, COMPAT_RO);
				5464	if (ret)
				5465	return ret;
				5466
				5467	ret = check_feature(fs_info, flags[0].incompat_flags,
				5468	flags[1].incompat_flags, INCOMPAT);
				5469	if (ret)
				5470	return ret;
				5471
				5472	ret = mnt_want_write_file(file);
				5473	if (ret)
				5474	return ret;
				5475
				5476	trans = btrfs_start_transaction(root, 0);
				5477	if (IS_ERR(trans)) {
				5478	ret = PTR_ERR(trans);
				5479	goto out_drop_write;
				5480	}
				5481
				5482	spin_lock(&fs_info->super_lock);
				5483	newflags = btrfs_super_compat_flags(super_block);
				5484	newflags \|= flags[0].compat_flags & flags[1].compat_flags;
				5485	newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
				5486	btrfs_set_super_compat_flags(super_block, newflags);
				5487
				5488	newflags = btrfs_super_compat_ro_flags(super_block);
				5489	newflags \|= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
				5490	newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
				5491	btrfs_set_super_compat_ro_flags(super_block, newflags);
				5492
				5493	newflags = btrfs_super_incompat_flags(super_block);
				5494	newflags \|= flags[0].incompat_flags & flags[1].incompat_flags;
				5495	newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
				5496	btrfs_set_super_incompat_flags(super_block, newflags);
				5497	spin_unlock(&fs_info->super_lock);
				5498
				5499	ret = btrfs_commit_transaction(trans);
				5500	out_drop_write:
				5501	mnt_drop_write_file(file);
				5502
				5503	return ret;
				5504	}
				5505
				5506	static int _btrfs_ioctl_send(struct file file, void __user argp, bool compat)
				5507	{
				5508	struct btrfs_ioctl_send_args *arg;
				5509	int ret;
				5510
				5511	if (compat) {
				5512	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
				5513	struct btrfs_ioctl_send_args_32 args32;
				5514
				5515	ret = copy_from_user(&args32, argp, sizeof(args32));
				5516	if (ret)
				5517	return -EFAULT;
				5518	arg = kzalloc(sizeof(*arg), GFP_KERNEL);
				5519	if (!arg)
				5520	return -ENOMEM;
				5521	arg->send_fd = args32.send_fd;
				5522	arg->clone_sources_count = args32.clone_sources_count;
				5523	arg->clone_sources = compat_ptr(args32.clone_sources);
				5524	arg->parent_root = args32.parent_root;
				5525	arg->flags = args32.flags;
				5526	memcpy(arg->reserved, args32.reserved,
				5527	sizeof(args32.reserved));
				5528	#else
				5529	return -ENOTTY;
				5530	#endif
				5531	} else {
				5532	arg = memdup_user(argp, sizeof(*arg));
				5533	if (IS_ERR(arg))
				5534	return PTR_ERR(arg);
				5535	}
				5536	ret = btrfs_ioctl_send(file, arg);
				5537	kfree(arg);
				5538	return ret;
				5539	}
				5540
				5541	long btrfs_ioctl(struct file *file, unsigned int
				5542	cmd, unsigned long arg)
				5543	{
				5544	struct inode *inode = file_inode(file);
				5545	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
				5546	struct btrfs_root *root = BTRFS_I(inode)->root;
				5547	void __user argp = (void __user )arg;
				5548
				5549	switch (cmd) {
				5550	case FS_IOC_GETFLAGS:
				5551	return btrfs_ioctl_getflags(file, argp);
				5552	case FS_IOC_SETFLAGS:
				5553	return btrfs_ioctl_setflags(file, argp);
				5554	case FS_IOC_GETVERSION:
				5555	return btrfs_ioctl_getversion(file, argp);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	5556	case FS_IOC_GETFSLABEL:
				5557	return btrfs_ioctl_get_fslabel(file, argp);
				5558	case FS_IOC_SETFSLABEL:
				5559	return btrfs_ioctl_set_fslabel(file, argp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5560	case FITRIM:
				5561	return btrfs_ioctl_fitrim(file, argp);
				5562	case BTRFS_IOC_SNAP_CREATE:
				5563	return btrfs_ioctl_snap_create(file, argp, 0);
				5564	case BTRFS_IOC_SNAP_CREATE_V2:
				5565	return btrfs_ioctl_snap_create_v2(file, argp, 0);
				5566	case BTRFS_IOC_SUBVOL_CREATE:
				5567	return btrfs_ioctl_snap_create(file, argp, 1);
				5568	case BTRFS_IOC_SUBVOL_CREATE_V2:
				5569	return btrfs_ioctl_snap_create_v2(file, argp, 1);
				5570	case BTRFS_IOC_SNAP_DESTROY:
				5571	return btrfs_ioctl_snap_destroy(file, argp);
				5572	case BTRFS_IOC_SUBVOL_GETFLAGS:
				5573	return btrfs_ioctl_subvol_getflags(file, argp);
				5574	case BTRFS_IOC_SUBVOL_SETFLAGS:
				5575	return btrfs_ioctl_subvol_setflags(file, argp);
				5576	case BTRFS_IOC_DEFAULT_SUBVOL:
				5577	return btrfs_ioctl_default_subvol(file, argp);
				5578	case BTRFS_IOC_DEFRAG:
				5579	return btrfs_ioctl_defrag(file, NULL);
				5580	case BTRFS_IOC_DEFRAG_RANGE:
				5581	return btrfs_ioctl_defrag(file, argp);
				5582	case BTRFS_IOC_RESIZE:
				5583	return btrfs_ioctl_resize(file, argp);
				5584	case BTRFS_IOC_ADD_DEV:
				5585	return btrfs_ioctl_add_dev(fs_info, argp);
				5586	case BTRFS_IOC_RM_DEV:
				5587	return btrfs_ioctl_rm_dev(file, argp);
				5588	case BTRFS_IOC_RM_DEV_V2:
				5589	return btrfs_ioctl_rm_dev_v2(file, argp);
				5590	case BTRFS_IOC_FS_INFO:
				5591	return btrfs_ioctl_fs_info(fs_info, argp);
				5592	case BTRFS_IOC_DEV_INFO:
				5593	return btrfs_ioctl_dev_info(fs_info, argp);
				5594	case BTRFS_IOC_BALANCE:
				5595	return btrfs_ioctl_balance(file, NULL);
				5596	case BTRFS_IOC_TREE_SEARCH:
				5597	return btrfs_ioctl_tree_search(file, argp);
				5598	case BTRFS_IOC_TREE_SEARCH_V2:
				5599	return btrfs_ioctl_tree_search_v2(file, argp);
				5600	case BTRFS_IOC_INO_LOOKUP:
				5601	return btrfs_ioctl_ino_lookup(file, argp);
				5602	case BTRFS_IOC_INO_PATHS:
				5603	return btrfs_ioctl_ino_to_path(root, argp);
				5604	case BTRFS_IOC_LOGICAL_INO:
				5605	return btrfs_ioctl_logical_to_ino(fs_info, argp, 1);
				5606	case BTRFS_IOC_LOGICAL_INO_V2:
				5607	return btrfs_ioctl_logical_to_ino(fs_info, argp, 2);
				5608	case BTRFS_IOC_SPACE_INFO:
				5609	return btrfs_ioctl_space_info(fs_info, argp);
				5610	case BTRFS_IOC_SYNC: {
				5611	int ret;
				5612
				5613	ret = btrfs_start_delalloc_roots(fs_info, -1);
				5614	if (ret)
				5615	return ret;
				5616	ret = btrfs_sync_fs(inode->i_sb, 1);
				5617	/*
				5618	* The transaction thread may want to do more work,
				5619	* namely it pokes the cleaner kthread that will start
				5620	* processing uncleaned subvols.
				5621	*/
				5622	wake_up_process(fs_info->transaction_kthread);
				5623	return ret;
				5624	}
				5625	case BTRFS_IOC_START_SYNC:
				5626	return btrfs_ioctl_start_sync(root, argp);
				5627	case BTRFS_IOC_WAIT_SYNC:
				5628	return btrfs_ioctl_wait_sync(fs_info, argp);
				5629	case BTRFS_IOC_SCRUB:
				5630	return btrfs_ioctl_scrub(file, argp);
				5631	case BTRFS_IOC_SCRUB_CANCEL:
				5632	return btrfs_ioctl_scrub_cancel(fs_info);
				5633	case BTRFS_IOC_SCRUB_PROGRESS:
				5634	return btrfs_ioctl_scrub_progress(fs_info, argp);
				5635	case BTRFS_IOC_BALANCE_V2:
				5636	return btrfs_ioctl_balance(file, argp);
				5637	case BTRFS_IOC_BALANCE_CTL:
				5638	return btrfs_ioctl_balance_ctl(fs_info, arg);
				5639	case BTRFS_IOC_BALANCE_PROGRESS:
				5640	return btrfs_ioctl_balance_progress(fs_info, argp);
				5641	case BTRFS_IOC_SET_RECEIVED_SUBVOL:
				5642	return btrfs_ioctl_set_received_subvol(file, argp);
				5643	#ifdef CONFIG_64BIT
				5644	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
				5645	return btrfs_ioctl_set_received_subvol_32(file, argp);
				5646	#endif
				5647	case BTRFS_IOC_SEND:
				5648	return _btrfs_ioctl_send(file, argp, false);
				5649	#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
				5650	case BTRFS_IOC_SEND_32:
				5651	return _btrfs_ioctl_send(file, argp, true);
				5652	#endif
				5653	case BTRFS_IOC_GET_DEV_STATS:
				5654	return btrfs_ioctl_get_dev_stats(fs_info, argp);
				5655	case BTRFS_IOC_QUOTA_CTL:
				5656	return btrfs_ioctl_quota_ctl(file, argp);
				5657	case BTRFS_IOC_QGROUP_ASSIGN:
				5658	return btrfs_ioctl_qgroup_assign(file, argp);
				5659	case BTRFS_IOC_QGROUP_CREATE:
				5660	return btrfs_ioctl_qgroup_create(file, argp);
				5661	case BTRFS_IOC_QGROUP_LIMIT:
				5662	return btrfs_ioctl_qgroup_limit(file, argp);
				5663	case BTRFS_IOC_QUOTA_RESCAN:
				5664	return btrfs_ioctl_quota_rescan(file, argp);
				5665	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
				5666	return btrfs_ioctl_quota_rescan_status(file, argp);
				5667	case BTRFS_IOC_QUOTA_RESCAN_WAIT:
				5668	return btrfs_ioctl_quota_rescan_wait(file, argp);
				5669	case BTRFS_IOC_DEV_REPLACE:
				5670	return btrfs_ioctl_dev_replace(fs_info, argp);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5671	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
				5672	return btrfs_ioctl_get_supported_features(argp);
				5673	case BTRFS_IOC_GET_FEATURES:
				5674	return btrfs_ioctl_get_features(file, argp);
				5675	case BTRFS_IOC_SET_FEATURES:
				5676	return btrfs_ioctl_set_features(file, argp);
				5677	case FS_IOC_FSGETXATTR:
				5678	return btrfs_ioctl_fsgetxattr(file, argp);
				5679	case FS_IOC_FSSETXATTR:
				5680	return btrfs_ioctl_fssetxattr(file, argp);
				5681	case BTRFS_IOC_GET_SUBVOL_INFO:
				5682	return btrfs_ioctl_get_subvol_info(file, argp);
				5683	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
				5684	return btrfs_ioctl_get_subvol_rootref(file, argp);
				5685	case BTRFS_IOC_INO_LOOKUP_USER:
				5686	return btrfs_ioctl_ino_lookup_user(file, argp);
				5687	}
				5688
				5689	return -ENOTTY;
				5690	}
				5691
				5692	#ifdef CONFIG_COMPAT
				5693	long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
				5694	{
				5695	/*
				5696	* These all access 32-bit values anyway so no further
				5697	* handling is necessary.
				5698	*/
				5699	switch (cmd) {
				5700	case FS_IOC32_GETFLAGS:
				5701	cmd = FS_IOC_GETFLAGS;
				5702	break;
				5703	case FS_IOC32_SETFLAGS:
				5704	cmd = FS_IOC_SETFLAGS;
				5705	break;
				5706	case FS_IOC32_GETVERSION:
				5707	cmd = FS_IOC_GETVERSION;
				5708	break;
				5709	}
				5710
				5711	return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
				5712	}
				5713	#endif