Blame - fs/namei.c - hafnium/third_party/linux

blob: 72f354b62dd5da19104293c90b24f37f41050b18 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* linux/fs/namei.c
				4	*
				5	* Copyright (C) 1991, 1992 Linus Torvalds
				6	*/
				7
				8	/*
				9	* Some corrections by tytso.
				10	*/
				11
				12	/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
				13	* lookup logic.
				14	*/
				15	/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
				16	*/
				17
				18	#include <linux/init.h>
				19	#include <linux/export.h>
				20	#include <linux/kernel.h>
				21	#include <linux/slab.h>
				22	#include <linux/fs.h>
				23	#include <linux/namei.h>
				24	#include <linux/pagemap.h>
				25	#include <linux/fsnotify.h>
				26	#include <linux/personality.h>
				27	#include <linux/security.h>
				28	#include <linux/ima.h>
				29	#include <linux/syscalls.h>
				30	#include <linux/mount.h>
				31	#include <linux/audit.h>
				32	#include <linux/capability.h>
				33	#include <linux/file.h>
				34	#include <linux/fcntl.h>
				35	#include <linux/device_cgroup.h>
				36	#include <linux/fs_struct.h>
				37	#include <linux/posix_acl.h>
				38	#include <linux/hash.h>
				39	#include <linux/bitops.h>
				40	#include <linux/init_task.h>
				41	#include <linux/uaccess.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	42
				43	#include "internal.h"
				44	#include "mount.h"
				45
				46	/* [Feb-1997 T. Schoebel-Theuer]
				47	* Fundamental changes in the pathname lookup mechanisms (namei)
				48	* were necessary because of omirr. The reason is that omirr needs
				49	* to know the _real_ pathname, not the user-supplied one, in case
				50	* of symlinks (and also when transname replacements occur).
				51	*
				52	* The new code replaces the old recursive symlink resolution with
				53	* an iterative one (in case of non-nested symlink chains). It does
				54	* this with calls to <fs>_follow_link().
				55	* As a side effect, dir_namei(), _namei() and follow_link() are now
				56	* replaced with a single function lookup_dentry() that can handle all
				57	* the special cases of the former code.
				58	*
				59	* With the new dcache, the pathname is stored at each inode, at least as
				60	* long as the refcount of the inode is positive. As a side effect, the
				61	* size of the dcache depends on the inode cache and thus is dynamic.
				62	*
				63	* [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
				64	* resolution to correspond with current state of the code.
				65	*
				66	* Note that the symlink resolution is not completely iterative.
				67	* There is still a significant amount of tail- and mid- recursion in
				68	* the algorithm. Also, note that <fs>_readlink() is not used in
				69	* lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
				70	* may return different results than <fs>_follow_link(). Many virtual
				71	* filesystems (including /proc) exhibit this behavior.
				72	*/
				73
				74	/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
				75	* New symlink semantics: when open() is called with flags O_CREAT \| O_EXCL
				76	* and the name already exists in form of a symlink, try to create the new
				77	* name indicated by the symlink. The old code always complained that the
				78	* name already exists, due to not following the symlink even if its target
				79	* is nonexistent. The new semantics affects also mknod() and link() when
				80	* the name is a symlink pointing to a non-existent name.
				81	*
				82	* I don't know which semantics is the right one, since I have no access
				83	* to standards. But I found by trial that HP-UX 9.0 has the full "new"
				84	* semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
				85	* "old" one. Personally, I think the new semantics is much more logical.
				86	* Note that "ln old new" where "new" is a symlink pointing to a non-existing
				87	* file does succeed in both HP-UX and SunOs, but not in Solaris
				88	* and in the old Linux semantics.
				89	*/
				90
				91	/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
				92	* semantics. See the comments in "open_namei" and "do_link" below.
				93	*
				94	* [10-Sep-98 Alan Modra] Another symlink change.
				95	*/
				96
				97	/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
				98	* inside the path - always follow.
				99	* in the last component in creation/removal/renaming - never follow.
				100	* if LOOKUP_FOLLOW passed - follow.
				101	* if the pathname has trailing slashes - follow.
				102	* otherwise - don't follow.
				103	* (applied in that order).
				104	*
				105	* [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
				106	* restored for 2.4. This is the last surviving part of old 4.2BSD bug.
				107	* During the 2.4 we need to fix the userland stuff depending on it -
				108	* hopefully we will be able to get rid of that wart in 2.5. So far only
				109	* XEmacs seems to be relying on it...
				110	*/
				111	/*
				112	* [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
				113	* implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
				114	* any extra contention...
				115	*/
				116
				117	/* In order to reduce some races, while at the same time doing additional
				118	* checking and hopefully speeding things up, we copy filenames to the
				119	* kernel data space before using them..
				120	*
				121	* POSIX.1 2.4: an empty pathname is invalid (ENOENT).
				122	* PATH_MAX includes the nul terminator --RR.
				123	*/
				124
				125	#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
				126
				127	struct filename *
				128	getname_flags(const char __user filename, int flags, int empty)
				129	{
				130	struct filename *result;
				131	char *kname;
				132	int len;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	133
				134	result = audit_reusename(filename);
				135	if (result)
				136	return result;
				137
				138	result = __getname();
				139	if (unlikely(!result))
				140	return ERR_PTR(-ENOMEM);
				141
				142	/*
				143	* First, try to embed the struct filename inside the names_cache
				144	* allocation
				145	*/
				146	kname = (char *)result->iname;
				147	result->name = kname;
				148
				149	len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
				150	if (unlikely(len < 0)) {
				151	__putname(result);
				152	return ERR_PTR(len);
				153	}
				154
				155	/*
				156	* Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
				157	* separate struct filename so we can dedicate the entire
				158	* names_cache allocation for the pathname, and re-do the copy from
				159	* userland.
				160	*/
				161	if (unlikely(len == EMBEDDED_NAME_MAX)) {
				162	const size_t size = offsetof(struct filename, iname[1]);
				163	kname = (char *)result;
				164
				165	/*
				166	* size is chosen that way we to guarantee that
				167	* result->iname[0] is within the same object and that
				168	* kname can't be equal to result->iname, no matter what.
				169	*/
				170	result = kzalloc(size, GFP_KERNEL);
				171	if (unlikely(!result)) {
				172	__putname(kname);
				173	return ERR_PTR(-ENOMEM);
				174	}
				175	result->name = kname;
				176	len = strncpy_from_user(kname, filename, PATH_MAX);
				177	if (unlikely(len < 0)) {
				178	__putname(kname);
				179	kfree(result);
				180	return ERR_PTR(len);
				181	}
				182	if (unlikely(len == PATH_MAX)) {
				183	__putname(kname);
				184	kfree(result);
				185	return ERR_PTR(-ENAMETOOLONG);
				186	}
				187	}
				188
				189	result->refcnt = 1;
				190	/* The empty path is special. */
				191	if (unlikely(!len)) {
				192	if (empty)
				193	*empty = 1;
				194	if (!(flags & LOOKUP_EMPTY)) {
				195	putname(result);
				196	return ERR_PTR(-ENOENT);
				197	}
				198	}
				199
				200	result->uptr = filename;
				201	result->aname = NULL;
				202	audit_getname(result);
				203	return result;
				204	}
				205
				206	struct filename *
				207	getname(const char __user * filename)
				208	{
				209	return getname_flags(filename, 0, NULL);
				210	}
				211
				212	struct filename *
				213	getname_kernel(const char * filename)
				214	{
				215	struct filename *result;
				216	int len = strlen(filename) + 1;
				217
				218	result = __getname();
				219	if (unlikely(!result))
				220	return ERR_PTR(-ENOMEM);
				221
				222	if (len <= EMBEDDED_NAME_MAX) {
				223	result->name = (char *)result->iname;
				224	} else if (len <= PATH_MAX) {
				225	const size_t size = offsetof(struct filename, iname[1]);
				226	struct filename *tmp;
				227
				228	tmp = kmalloc(size, GFP_KERNEL);
				229	if (unlikely(!tmp)) {
				230	__putname(result);
				231	return ERR_PTR(-ENOMEM);
				232	}
				233	tmp->name = (char *)result;
				234	result = tmp;
				235	} else {
				236	__putname(result);
				237	return ERR_PTR(-ENAMETOOLONG);
				238	}
				239	memcpy((char *)result->name, filename, len);
				240	result->uptr = NULL;
				241	result->aname = NULL;
				242	result->refcnt = 1;
				243	audit_getname(result);
				244
				245	return result;
				246	}
				247
				248	void putname(struct filename *name)
				249	{
				250	BUG_ON(name->refcnt <= 0);
				251
				252	if (--name->refcnt > 0)
				253	return;
				254
				255	if (name->name != name->iname) {
				256	__putname(name->name);
				257	kfree(name);
				258	} else
				259	__putname(name);
				260	}
				261
				262	static int check_acl(struct inode *inode, int mask)
				263	{
				264	#ifdef CONFIG_FS_POSIX_ACL
				265	struct posix_acl *acl;
				266
				267	if (mask & MAY_NOT_BLOCK) {
				268	acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
				269	if (!acl)
				270	return -EAGAIN;
				271	/* no ->get_acl() calls in RCU mode... */
				272	if (is_uncached_acl(acl))
				273	return -ECHILD;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	274	return posix_acl_permission(inode, acl, mask);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	275	}
				276
				277	acl = get_acl(inode, ACL_TYPE_ACCESS);
				278	if (IS_ERR(acl))
				279	return PTR_ERR(acl);
				280	if (acl) {
				281	int error = posix_acl_permission(inode, acl, mask);
				282	posix_acl_release(acl);
				283	return error;
				284	}
				285	#endif
				286
				287	return -EAGAIN;
				288	}
				289
				290	/*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	291	* This does the basic UNIX permission checking.
				292	*
				293	* Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
				294	* for RCU walking.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	295	*/
				296	static int acl_permission_check(struct inode *inode, int mask)
				297	{
				298	unsigned int mode = inode->i_mode;
				299
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	300	/* Are we the owner? If so, ACL's don't matter */
				301	if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
				302	mask &= 7;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	303	mode >>= 6;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	304	return (mask & ~mode) ? -EACCES : 0;
				305	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	306
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	307	/* Do we have ACL's? */
				308	if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
				309	int error = check_acl(inode, mask);
				310	if (error != -EAGAIN)
				311	return error;
				312	}
				313
				314	/* Only RWX matters for group/other mode bits */
				315	mask &= 7;
				316
				317	/*
				318	* Are the group permissions different from
				319	* the other permissions in the bits we care
				320	* about? Need to check group ownership if so.
				321	*/
				322	if (mask & (mode ^ (mode >> 3))) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	323	if (in_group_p(inode->i_gid))
				324	mode >>= 3;
				325	}
				326
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	327	/* Bits in 'mode' clear that we require? */
				328	return (mask & ~mode) ? -EACCES : 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	329	}
				330
				331	/**
				332	* generic_permission - check for access rights on a Posix-like filesystem
				333	* @inode: inode to check access rights for
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	334	* @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
				335	* %MAY_NOT_BLOCK ...)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	336	*
				337	* Used to check for read/write/execute permissions on a file.
				338	* We use "fsuid" for this, letting us set arbitrary permissions
				339	* for filesystem access without changing the "normal" uids which
				340	* are used for other things.
				341	*
				342	* generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
				343	* request cannot be satisfied (eg. requires blocking or too much complexity).
				344	* It would then be called again in ref-walk mode.
				345	*/
				346	int generic_permission(struct inode *inode, int mask)
				347	{
				348	int ret;
				349
				350	/*
				351	* Do the basic permission checks.
				352	*/
				353	ret = acl_permission_check(inode, mask);
				354	if (ret != -EACCES)
				355	return ret;
				356
				357	if (S_ISDIR(inode->i_mode)) {
				358	/* DACs are overridable for directories */
				359	if (!(mask & MAY_WRITE))
				360	if (capable_wrt_inode_uidgid(inode,
				361	CAP_DAC_READ_SEARCH))
				362	return 0;
				363	if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
				364	return 0;
				365	return -EACCES;
				366	}
				367
				368	/*
				369	* Searching includes executable on directories, else just read.
				370	*/
				371	mask &= MAY_READ \| MAY_WRITE \| MAY_EXEC;
				372	if (mask == MAY_READ)
				373	if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
				374	return 0;
				375	/*
				376	* Read/write DACs are always overridable.
				377	* Executable DACs are overridable when there is
				378	* at least one exec bit set.
				379	*/
				380	if (!(mask & MAY_EXEC) \|\| (inode->i_mode & S_IXUGO))
				381	if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
				382	return 0;
				383
				384	return -EACCES;
				385	}
				386	EXPORT_SYMBOL(generic_permission);
				387
				388	/*
				389	* We _really_ want to just do "generic_permission()" without
				390	* even looking at the inode->i_op values. So we keep a cache
				391	* flag in inode->i_opflags, that says "this has not special
				392	* permission function, use the fast case".
				393	*/
				394	static inline int do_inode_permission(struct inode *inode, int mask)
				395	{
				396	if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
				397	if (likely(inode->i_op->permission))
				398	return inode->i_op->permission(inode, mask);
				399
				400	/* This gets set once for the inode lifetime */
				401	spin_lock(&inode->i_lock);
				402	inode->i_opflags \|= IOP_FASTPERM;
				403	spin_unlock(&inode->i_lock);
				404	}
				405	return generic_permission(inode, mask);
				406	}
				407
				408	/**
				409	* sb_permission - Check superblock-level permissions
				410	* @sb: Superblock of inode to check permission on
				411	* @inode: Inode to check permission on
				412	* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
				413	*
				414	* Separate out file-system wide checks from inode-specific permission checks.
				415	*/
				416	static int sb_permission(struct super_block sb, struct inode inode, int mask)
				417	{
				418	if (unlikely(mask & MAY_WRITE)) {
				419	umode_t mode = inode->i_mode;
				420
				421	/* Nobody gets write access to a read-only fs. */
				422	if (sb_rdonly(sb) && (S_ISREG(mode) \|\| S_ISDIR(mode) \|\| S_ISLNK(mode)))
				423	return -EROFS;
				424	}
				425	return 0;
				426	}
				427
				428	/**
				429	* inode_permission - Check for access rights to a given inode
				430	* @inode: Inode to check permission on
				431	* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
				432	*
				433	* Check for read/write/execute permissions on an inode. We use fs[ug]id for
				434	* this, letting us set arbitrary permissions for filesystem access without
				435	* changing the "normal" UIDs which are used for other things.
				436	*
				437	* When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
				438	*/
				439	int inode_permission(struct inode *inode, int mask)
				440	{
				441	int retval;
				442
				443	retval = sb_permission(inode->i_sb, inode, mask);
				444	if (retval)
				445	return retval;
				446
				447	if (unlikely(mask & MAY_WRITE)) {
				448	/*
				449	* Nobody gets write access to an immutable file.
				450	*/
				451	if (IS_IMMUTABLE(inode))
				452	return -EPERM;
				453
				454	/*
				455	* Updating mtime will likely cause i_uid and i_gid to be
				456	* written back improperly if their true value is unknown
				457	* to the vfs.
				458	*/
				459	if (HAS_UNMAPPED_ID(inode))
				460	return -EACCES;
				461	}
				462
				463	retval = do_inode_permission(inode, mask);
				464	if (retval)
				465	return retval;
				466
				467	retval = devcgroup_inode_permission(inode, mask);
				468	if (retval)
				469	return retval;
				470
				471	return security_inode_permission(inode, mask);
				472	}
				473	EXPORT_SYMBOL(inode_permission);
				474
				475	/**
				476	* path_get - get a reference to a path
				477	* @path: path to get the reference to
				478	*
				479	* Given a path increment the reference count to the dentry and the vfsmount.
				480	*/
				481	void path_get(const struct path *path)
				482	{
				483	mntget(path->mnt);
				484	dget(path->dentry);
				485	}
				486	EXPORT_SYMBOL(path_get);
				487
				488	/**
				489	* path_put - put a reference to a path
				490	* @path: path to put the reference to
				491	*
				492	* Given a path decrement the reference count to the dentry and the vfsmount.
				493	*/
				494	void path_put(const struct path *path)
				495	{
				496	dput(path->dentry);
				497	mntput(path->mnt);
				498	}
				499	EXPORT_SYMBOL(path_put);
				500
				501	#define EMBEDDED_LEVELS 2
				502	struct nameidata {
				503	struct path path;
				504	struct qstr last;
				505	struct path root;
				506	struct inode inode; / path.dentry.d_inode */
				507	unsigned int flags;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	508	unsigned seq, m_seq, r_seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	509	int last_type;
				510	unsigned depth;
				511	int total_link_count;
				512	struct saved {
				513	struct path link;
				514	struct delayed_call done;
				515	const char *name;
				516	unsigned seq;
				517	} *stack, internal[EMBEDDED_LEVELS];
				518	struct filename *name;
				519	struct nameidata *saved;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	520	unsigned root_seq;
				521	int dfd;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	522	kuid_t dir_uid;
				523	umode_t dir_mode;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	524	} __randomize_layout;
				525
				526	static void set_nameidata(struct nameidata p, int dfd, struct filename name)
				527	{
				528	struct nameidata *old = current->nameidata;
				529	p->stack = p->internal;
				530	p->dfd = dfd;
				531	p->name = name;
				532	p->total_link_count = old ? old->total_link_count : 0;
				533	p->saved = old;
				534	current->nameidata = p;
				535	}
				536
				537	static void restore_nameidata(void)
				538	{
				539	struct nameidata now = current->nameidata, old = now->saved;
				540
				541	current->nameidata = old;
				542	if (old)
				543	old->total_link_count = now->total_link_count;
				544	if (now->stack != now->internal)
				545	kfree(now->stack);
				546	}
				547
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	548	static bool nd_alloc_stack(struct nameidata *nd)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	549	{
				550	struct saved *p;
				551
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	552	p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
				553	nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
				554	if (unlikely(!p))
				555	return false;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	556	memcpy(p, nd->internal, sizeof(nd->internal));
				557	nd->stack = p;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	558	return true;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	559	}
				560
				561	/**
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	562	* path_connected - Verify that a dentry is below mnt.mnt_root
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	563	*
				564	* Rename can sometimes move a file or directory outside of a bind
				565	* mount, path_connected allows those cases to be detected.
				566	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	567	static bool path_connected(struct vfsmount mnt, struct dentry dentry)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	568	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	569	struct super_block *sb = mnt->mnt_sb;
				570
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	571	/* Bind mounts can have disconnected paths */
				572	if (mnt->mnt_root == sb->s_root)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	573	return true;
				574
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	575	return is_subdir(dentry, mnt->mnt_root);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	576	}
				577
				578	static void drop_links(struct nameidata *nd)
				579	{
				580	int i = nd->depth;
				581	while (i--) {
				582	struct saved *last = nd->stack + i;
				583	do_delayed_call(&last->done);
				584	clear_delayed_call(&last->done);
				585	}
				586	}
				587
				588	static void terminate_walk(struct nameidata *nd)
				589	{
				590	drop_links(nd);
				591	if (!(nd->flags & LOOKUP_RCU)) {
				592	int i;
				593	path_put(&nd->path);
				594	for (i = 0; i < nd->depth; i++)
				595	path_put(&nd->stack[i].link);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	596	if (nd->flags & LOOKUP_ROOT_GRABBED) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	597	path_put(&nd->root);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	598	nd->flags &= ~LOOKUP_ROOT_GRABBED;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	599	}
				600	} else {
				601	nd->flags &= ~LOOKUP_RCU;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	602	rcu_read_unlock();
				603	}
				604	nd->depth = 0;
				605	}
				606
				607	/* path_put is needed afterwards regardless of success or failure */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	608	static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	609	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	610	int res = __legitimize_mnt(path->mnt, mseq);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	611	if (unlikely(res)) {
				612	if (res > 0)
				613	path->mnt = NULL;
				614	path->dentry = NULL;
				615	return false;
				616	}
				617	if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
				618	path->dentry = NULL;
				619	return false;
				620	}
				621	return !read_seqcount_retry(&path->dentry->d_seq, seq);
				622	}
				623
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	624	static inline bool legitimize_path(struct nameidata *nd,
				625	struct path *path, unsigned seq)
				626	{
				627	return __legitimize_path(path, seq, nd->m_seq);
				628	}
				629
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	630	static bool legitimize_links(struct nameidata *nd)
				631	{
				632	int i;
				633	for (i = 0; i < nd->depth; i++) {
				634	struct saved *last = nd->stack + i;
				635	if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
				636	drop_links(nd);
				637	nd->depth = i + 1;
				638	return false;
				639	}
				640	}
				641	return true;
				642	}
				643
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	644	static bool legitimize_root(struct nameidata *nd)
				645	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	646	/*
				647	* For scoped-lookups (where nd->root has been zeroed), we need to
				648	* restart the whole lookup from scratch -- because set_root() is wrong
				649	* for these lookups (nd->dfd is the root, not the filesystem root).
				650	*/
				651	if (!nd->root.mnt && (nd->flags & LOOKUP_IS_SCOPED))
				652	return false;
				653	/* Nothing to do if nd->root is zero or is managed by the VFS user. */
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	654	if (!nd->root.mnt \|\| (nd->flags & LOOKUP_ROOT))
				655	return true;
				656	nd->flags \|= LOOKUP_ROOT_GRABBED;
				657	return legitimize_path(nd, &nd->root, nd->root_seq);
				658	}
				659
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	660	/*
				661	* Path walking has 2 modes, rcu-walk and ref-walk (see
				662	* Documentation/filesystems/path-lookup.txt). In situations when we can't
				663	* continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
				664	* normal reference counts on dentries and vfsmounts to transition to ref-walk
				665	* mode. Refcounts are grabbed at the last known good point before rcu-walk
				666	* got stuck, so ref-walk may continue from there. If this is not successful
				667	* (eg. a seqcount has changed), then failure is returned and it's up to caller
				668	* to restart the path walk from the beginning in ref-walk mode.
				669	*/
				670
				671	/**
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	672	* try_to_unlazy - try to switch to ref-walk mode.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	673	* @nd: nameidata pathwalk data
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	674	* Returns: true on success, false on failure
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	675	*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	676	* try_to_unlazy attempts to legitimize the current nd->path and nd->root
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	677	* for ref-walk mode.
				678	* Must be called from rcu-walk context.
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	679	* Nothing should touch nameidata between try_to_unlazy() failure and
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	680	* terminate_walk().
				681	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	682	static bool try_to_unlazy(struct nameidata *nd)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	683	{
				684	struct dentry *parent = nd->path.dentry;
				685
				686	BUG_ON(!(nd->flags & LOOKUP_RCU));
				687
				688	nd->flags &= ~LOOKUP_RCU;
				689	if (unlikely(!legitimize_links(nd)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	690	goto out1;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	691	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
				692	goto out;
				693	if (unlikely(!legitimize_root(nd)))
				694	goto out;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	695	rcu_read_unlock();
				696	BUG_ON(nd->inode != parent->d_inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	697	return true;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	698
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	699	out1:
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	700	nd->path.mnt = NULL;
				701	nd->path.dentry = NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	702	out:
				703	rcu_read_unlock();
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	704	return false;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	705	}
				706
				707	/**
				708	* unlazy_child - try to switch to ref-walk mode.
				709	* @nd: nameidata pathwalk data
				710	* @dentry: child of nd->path.dentry
				711	* @seq: seq number to check dentry against
				712	* Returns: 0 on success, -ECHILD on failure
				713	*
				714	* unlazy_child attempts to legitimize the current nd->path, nd->root and dentry
				715	* for ref-walk mode. @dentry must be a path found by a do_lookup call on
				716	* @nd. Must be called from rcu-walk context.
				717	* Nothing should touch nameidata between unlazy_child() failure and
				718	* terminate_walk().
				719	*/
				720	static int unlazy_child(struct nameidata nd, struct dentry dentry, unsigned seq)
				721	{
				722	BUG_ON(!(nd->flags & LOOKUP_RCU));
				723
				724	nd->flags &= ~LOOKUP_RCU;
				725	if (unlikely(!legitimize_links(nd)))
				726	goto out2;
				727	if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
				728	goto out2;
				729	if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
				730	goto out1;
				731
				732	/*
				733	* We need to move both the parent and the dentry from the RCU domain
				734	* to be properly refcounted. And the sequence number in the dentry
				735	* validates both dentry counters, since we checked the sequence
				736	* number of the parent after we got the child sequence number. So we
				737	* know the parent must still be valid if the child sequence number is
				738	*/
				739	if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
				740	goto out;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	741	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
				742	goto out_dput;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	743	/*
				744	* Sequence counts matched. Now make sure that the root is
				745	* still valid and get it if required.
				746	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	747	if (unlikely(!legitimize_root(nd)))
				748	goto out_dput;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	749	rcu_read_unlock();
				750	return 0;
				751
				752	out2:
				753	nd->path.mnt = NULL;
				754	out1:
				755	nd->path.dentry = NULL;
				756	out:
				757	rcu_read_unlock();
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	758	return -ECHILD;
				759	out_dput:
				760	rcu_read_unlock();
				761	dput(dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	762	return -ECHILD;
				763	}
				764
				765	static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
				766	{
				767	if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
				768	return dentry->d_op->d_revalidate(dentry, flags);
				769	else
				770	return 1;
				771	}
				772
				773	/**
				774	* complete_walk - successful completion of path walk
				775	* @nd: pointer nameidata
				776	*
				777	* If we had been in RCU mode, drop out of it and legitimize nd->path.
				778	* Revalidate the final result, unless we'd already done that during
				779	* the path walk or the filesystem doesn't ask for it. Return 0 on
				780	* success, -error on failure. In case of failure caller does not
				781	* need to drop nd->path.
				782	*/
				783	static int complete_walk(struct nameidata *nd)
				784	{
				785	struct dentry *dentry = nd->path.dentry;
				786	int status;
				787
				788	if (nd->flags & LOOKUP_RCU) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	789	/*
				790	* We don't want to zero nd->root for scoped-lookups or
				791	* externally-managed nd->root.
				792	*/
				793	if (!(nd->flags & (LOOKUP_ROOT \| LOOKUP_IS_SCOPED)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	794	nd->root.mnt = NULL;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	795	if (!try_to_unlazy(nd))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	796	return -ECHILD;
				797	}
				798
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	799	if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
				800	/*
				801	* While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
				802	* ever step outside the root during lookup" and should already
				803	* be guaranteed by the rest of namei, we want to avoid a namei
				804	* BUG resulting in userspace being given a path that was not
				805	* scoped within the root at some point during the lookup.
				806	*
				807	* So, do a final sanity-check to make sure that in the
				808	* worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
				809	* we won't silently return an fd completely outside of the
				810	* requested root to userspace.
				811	*
				812	* Userspace could move the path outside the root after this
				813	* check, but as discussed elsewhere this is not a concern (the
				814	* resolved file was inside the root at some point).
				815	*/
				816	if (!path_is_under(&nd->path, &nd->root))
				817	return -EXDEV;
				818	}
				819
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	820	if (likely(!(nd->flags & LOOKUP_JUMPED)))
				821	return 0;
				822
				823	if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
				824	return 0;
				825
				826	status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
				827	if (status > 0)
				828	return 0;
				829
				830	if (!status)
				831	status = -ESTALE;
				832
				833	return status;
				834	}
				835
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	836	static int set_root(struct nameidata *nd)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	837	{
				838	struct fs_struct *fs = current->fs;
				839
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	840	/*
				841	* Jumping to the real root in a scoped-lookup is a BUG in namei, but we
				842	* still have to ensure it doesn't happen because it will cause a breakout
				843	* from the dirfd.
				844	*/
				845	if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
				846	return -ENOTRECOVERABLE;
				847
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	848	if (nd->flags & LOOKUP_RCU) {
				849	unsigned seq;
				850
				851	do {
				852	seq = read_seqcount_begin(&fs->seq);
				853	nd->root = fs->root;
				854	nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
				855	} while (read_seqcount_retry(&fs->seq, seq));
				856	} else {
				857	get_fs_root(fs, &nd->root);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	858	nd->flags \|= LOOKUP_ROOT_GRABBED;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	859	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	860	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	861	}
				862
				863	static int nd_jump_root(struct nameidata *nd)
				864	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	865	if (unlikely(nd->flags & LOOKUP_BENEATH))
				866	return -EXDEV;
				867	if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
				868	/* Absolute path arguments to path_init() are allowed. */
				869	if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
				870	return -EXDEV;
				871	}
				872	if (!nd->root.mnt) {
				873	int error = set_root(nd);
				874	if (error)
				875	return error;
				876	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	877	if (nd->flags & LOOKUP_RCU) {
				878	struct dentry *d;
				879	nd->path = nd->root;
				880	d = nd->path.dentry;
				881	nd->inode = d->d_inode;
				882	nd->seq = nd->root_seq;
				883	if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
				884	return -ECHILD;
				885	} else {
				886	path_put(&nd->path);
				887	nd->path = nd->root;
				888	path_get(&nd->path);
				889	nd->inode = nd->path.dentry->d_inode;
				890	}
				891	nd->flags \|= LOOKUP_JUMPED;
				892	return 0;
				893	}
				894
				895	/*
				896	* Helper to directly jump to a known parsed path from ->get_link,
				897	* caller must have taken a reference to path beforehand.
				898	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	899	int nd_jump_link(struct path *path)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	900	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	901	int error = -ELOOP;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	902	struct nameidata *nd = current->nameidata;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	903
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	904	if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
				905	goto err;
				906
				907	error = -EXDEV;
				908	if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
				909	if (nd->path.mnt != path->mnt)
				910	goto err;
				911	}
				912	/* Not currently safe for scoped-lookups. */
				913	if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
				914	goto err;
				915
				916	path_put(&nd->path);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	917	nd->path = *path;
				918	nd->inode = nd->path.dentry->d_inode;
				919	nd->flags \|= LOOKUP_JUMPED;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	920	return 0;
				921
				922	err:
				923	path_put(path);
				924	return error;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	925	}
				926
				927	static inline void put_link(struct nameidata *nd)
				928	{
				929	struct saved *last = nd->stack + --nd->depth;
				930	do_delayed_call(&last->done);
				931	if (!(nd->flags & LOOKUP_RCU))
				932	path_put(&last->link);
				933	}
				934
				935	int sysctl_protected_symlinks __read_mostly = 0;
				936	int sysctl_protected_hardlinks __read_mostly = 0;
				937	int sysctl_protected_fifos __read_mostly;
				938	int sysctl_protected_regular __read_mostly;
				939
				940	/**
				941	* may_follow_link - Check symlink following for unsafe situations
				942	* @nd: nameidata pathwalk data
				943	*
				944	* In the case of the sysctl_protected_symlinks sysctl being enabled,
				945	* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
				946	* in a sticky world-writable directory. This is to protect privileged
				947	* processes from failing races against path names that may change out
				948	* from under them by way of other users creating malicious symlinks.
				949	* It will permit symlinks to be followed only when outside a sticky
				950	* world-writable directory, or when the uid of the symlink and follower
				951	* match, or when the directory owner matches the symlink's owner.
				952	*
				953	* Returns 0 if following the symlink is allowed, -ve on error.
				954	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	955	static inline int may_follow_link(struct nameidata nd, const struct inode inode)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	956	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	957	if (!sysctl_protected_symlinks)
				958	return 0;
				959
				960	/* Allowed if owner and follower match. */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	961	if (uid_eq(current_cred()->fsuid, inode->i_uid))
				962	return 0;
				963
				964	/* Allowed if parent directory not sticky and world-writable. */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	965	if ((nd->dir_mode & (S_ISVTX\|S_IWOTH)) != (S_ISVTX\|S_IWOTH))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	966	return 0;
				967
				968	/* Allowed if parent directory and link owner match. */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	969	if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	970	return 0;
				971
				972	if (nd->flags & LOOKUP_RCU)
				973	return -ECHILD;
				974
				975	audit_inode(nd->name, nd->stack[0].link.dentry, 0);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	976	audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	977	return -EACCES;
				978	}
				979
				980	/**
				981	* safe_hardlink_source - Check for safe hardlink conditions
				982	* @inode: the source inode to hardlink from
				983	*
				984	* Return false if at least one of the following conditions:
				985	* - inode is not a regular file
				986	* - inode is setuid
				987	* - inode is setgid and group-exec
				988	* - access failure for read and write
				989	*
				990	* Otherwise returns true.
				991	*/
				992	static bool safe_hardlink_source(struct inode *inode)
				993	{
				994	umode_t mode = inode->i_mode;
				995
				996	/* Special files should not get pinned to the filesystem. */
				997	if (!S_ISREG(mode))
				998	return false;
				999
				1000	/* Setuid files should not get pinned to the filesystem. */
				1001	if (mode & S_ISUID)
				1002	return false;
				1003
				1004	/* Executable setgid files should not get pinned to the filesystem. */
				1005	if ((mode & (S_ISGID \| S_IXGRP)) == (S_ISGID \| S_IXGRP))
				1006	return false;
				1007
				1008	/* Hardlinking to unreadable or unwritable sources is dangerous. */
				1009	if (inode_permission(inode, MAY_READ \| MAY_WRITE))
				1010	return false;
				1011
				1012	return true;
				1013	}
				1014
				1015	/**
				1016	* may_linkat - Check permissions for creating a hardlink
				1017	* @link: the source to hardlink from
				1018	*
				1019	* Block hardlink when all of:
				1020	* - sysctl_protected_hardlinks enabled
				1021	* - fsuid does not match inode
				1022	* - hardlink source is unsafe (see safe_hardlink_source() above)
				1023	* - not CAP_FOWNER in a namespace with the inode owner uid mapped
				1024	*
				1025	* Returns 0 if successful, -ve on error.
				1026	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1027	int may_linkat(struct path *link)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1028	{
				1029	struct inode *inode = link->dentry->d_inode;
				1030
				1031	/* Inode writeback is not safe when the uid or gid are invalid. */
				1032	if (!uid_valid(inode->i_uid) \|\| !gid_valid(inode->i_gid))
				1033	return -EOVERFLOW;
				1034
				1035	if (!sysctl_protected_hardlinks)
				1036	return 0;
				1037
				1038	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
				1039	* otherwise, it must be a safe source.
				1040	*/
				1041	if (safe_hardlink_source(inode) \|\| inode_owner_or_capable(inode))
				1042	return 0;
				1043
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1044	audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1045	return -EPERM;
				1046	}
				1047
				1048	/**
				1049	* may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
				1050	* should be allowed, or not, on files that already
				1051	* exist.
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1052	* @dir_mode: mode bits of directory
				1053	* @dir_uid: owner of directory
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1054	* @inode: the inode of the file to open
				1055	*
				1056	* Block an O_CREAT open of a FIFO (or a regular file) when:
				1057	* - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
				1058	* - the file already exists
				1059	* - we are in a sticky directory
				1060	* - we don't own the file
				1061	* - the owner of the directory doesn't own the file
				1062	* - the directory is world writable
				1063	* If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
				1064	* the directory doesn't have to be world writable: being group writable will
				1065	* be enough.
				1066	*
				1067	* Returns 0 if the open is allowed, -ve on error.
				1068	*/
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1069	static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1070	struct inode * const inode)
				1071	{
				1072	if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) \|\|
				1073	(!sysctl_protected_regular && S_ISREG(inode->i_mode)) \|\|
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1074	likely(!(dir_mode & S_ISVTX)) \|\|
				1075	uid_eq(inode->i_uid, dir_uid) \|\|
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1076	uid_eq(current_fsuid(), inode->i_uid))
				1077	return 0;
				1078
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame]	1079	if (likely(dir_mode & 0002) \|\|
				1080	(dir_mode & 0020 &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1081	((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) \|\|
				1082	(sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1083	const char *operation = S_ISFIFO(inode->i_mode) ?
				1084	"sticky_create_fifo" :
				1085	"sticky_create_regular";
				1086	audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1087	return -EACCES;
				1088	}
				1089	return 0;
				1090	}
				1091
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1092	/*
				1093	* follow_up - Find the mountpoint of path's vfsmount
				1094	*
				1095	* Given a path, find the mountpoint of its source file system.
				1096	* Replace @path with the path of the mountpoint in the parent mount.
				1097	* Up is towards /.
				1098	*
				1099	* Return 1 if we went up a level and 0 if we were already at the
				1100	* root.
				1101	*/
				1102	int follow_up(struct path *path)
				1103	{
				1104	struct mount *mnt = real_mount(path->mnt);
				1105	struct mount *parent;
				1106	struct dentry *mountpoint;
				1107
				1108	read_seqlock_excl(&mount_lock);
				1109	parent = mnt->mnt_parent;
				1110	if (parent == mnt) {
				1111	read_sequnlock_excl(&mount_lock);
				1112	return 0;
				1113	}
				1114	mntget(&parent->mnt);
				1115	mountpoint = dget(mnt->mnt_mountpoint);
				1116	read_sequnlock_excl(&mount_lock);
				1117	dput(path->dentry);
				1118	path->dentry = mountpoint;
				1119	mntput(path->mnt);
				1120	path->mnt = &parent->mnt;
				1121	return 1;
				1122	}
				1123	EXPORT_SYMBOL(follow_up);
				1124
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1125	static bool choose_mountpoint_rcu(struct mount m, const struct path root,
				1126	struct path path, unsigned seqp)
				1127	{
				1128	while (mnt_has_parent(m)) {
				1129	struct dentry *mountpoint = m->mnt_mountpoint;
				1130
				1131	m = m->mnt_parent;
				1132	if (unlikely(root->dentry == mountpoint &&
				1133	root->mnt == &m->mnt))
				1134	break;
				1135	if (mountpoint != m->mnt.mnt_root) {
				1136	path->mnt = &m->mnt;
				1137	path->dentry = mountpoint;
				1138	*seqp = read_seqcount_begin(&mountpoint->d_seq);
				1139	return true;
				1140	}
				1141	}
				1142	return false;
				1143	}
				1144
				1145	static bool choose_mountpoint(struct mount m, const struct path root,
				1146	struct path *path)
				1147	{
				1148	bool found;
				1149
				1150	rcu_read_lock();
				1151	while (1) {
				1152	unsigned seq, mseq = read_seqbegin(&mount_lock);
				1153
				1154	found = choose_mountpoint_rcu(m, root, path, &seq);
				1155	if (unlikely(!found)) {
				1156	if (!read_seqretry(&mount_lock, mseq))
				1157	break;
				1158	} else {
				1159	if (likely(__legitimize_path(path, seq, mseq)))
				1160	break;
				1161	rcu_read_unlock();
				1162	path_put(path);
				1163	rcu_read_lock();
				1164	}
				1165	}
				1166	rcu_read_unlock();
				1167	return found;
				1168	}
				1169
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1170	/*
				1171	* Perform an automount
				1172	* - return -EISDIR to tell follow_managed() to stop and return the path we
				1173	* were called with.
				1174	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1175	static int follow_automount(struct path path, int count, unsigned lookup_flags)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1176	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1177	struct dentry *dentry = path->dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1178
				1179	/* We don't want to mount if someone's just doing a stat -
				1180	* unless they're stat'ing a directory and appended a '/' to
				1181	* the name.
				1182	*
				1183	* We do, however, want to mount if someone wants to open or
				1184	* create a file of any type under the mountpoint, wants to
				1185	* traverse through the mountpoint or wants to open the
				1186	* mounted directory. Also, autofs may mark negative dentries
				1187	* as being automount points. These will need the attentions
				1188	* of the daemon to instantiate them before they can be used.
				1189	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1190	if (!(lookup_flags & (LOOKUP_PARENT \| LOOKUP_DIRECTORY \|
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1191	LOOKUP_OPEN \| LOOKUP_CREATE \| LOOKUP_AUTOMOUNT)) &&
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1192	dentry->d_inode)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1193	return -EISDIR;
				1194
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1195	if (count && (*count)++ >= MAXSYMLINKS)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1196	return -ELOOP;
				1197
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1198	return finish_automount(dentry->d_op->d_automount(path), path);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1199	}
				1200
				1201	/*
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1202	* mount traversal - out-of-line part. One note on ->d_flags accesses -
				1203	* dentries are pinned but not locked here, so negative dentry can go
				1204	* positive right under us. Use of smp_load_acquire() provides a barrier
				1205	* sufficient for ->d_inode and ->d_flags consistency.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1206	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1207	static int __traverse_mounts(struct path path, unsigned flags, bool jumped,
				1208	int *count, unsigned lookup_flags)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1209	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1210	struct vfsmount *mnt = path->mnt;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1211	bool need_mntput = false;
				1212	int ret = 0;
				1213
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1214	while (flags & DCACHE_MANAGED_DENTRY) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1215	/* Allow the filesystem to manage the transit without i_mutex
				1216	* being held. */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1217	if (flags & DCACHE_MANAGE_TRANSIT) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1218	ret = path->dentry->d_op->d_manage(path, false);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1219	flags = smp_load_acquire(&path->dentry->d_flags);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1220	if (ret < 0)
				1221	break;
				1222	}
				1223
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1224	if (flags & DCACHE_MOUNTED) { // something's mounted on it..
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1225	struct vfsmount *mounted = lookup_mnt(path);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1226	if (mounted) { // ... in our namespace
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1227	dput(path->dentry);
				1228	if (need_mntput)
				1229	mntput(path->mnt);
				1230	path->mnt = mounted;
				1231	path->dentry = dget(mounted->mnt_root);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1232	// here we know it's positive
				1233	flags = path->dentry->d_flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1234	need_mntput = true;
				1235	continue;
				1236	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1237	}
				1238
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1239	if (!(flags & DCACHE_NEED_AUTOMOUNT))
				1240	break;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1241
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1242	// uncovered automount point
				1243	ret = follow_automount(path, count, lookup_flags);
				1244	flags = smp_load_acquire(&path->dentry->d_flags);
				1245	if (ret < 0)
				1246	break;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1247	}
				1248
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1249	if (ret == -EISDIR)
				1250	ret = 0;
				1251	// possible if you race with several mount --move
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1252	if (need_mntput && path->mnt == mnt)
				1253	mntput(path->mnt);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1254	if (!ret && unlikely(d_flags_negative(flags)))
				1255	ret = -ENOENT;
				1256	*jumped = need_mntput;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1257	return ret;
				1258	}
				1259
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1260	static inline int traverse_mounts(struct path path, bool jumped,
				1261	int *count, unsigned lookup_flags)
				1262	{
				1263	unsigned flags = smp_load_acquire(&path->dentry->d_flags);
				1264
				1265	/* fastpath */
				1266	if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
				1267	*jumped = false;
				1268	if (unlikely(d_flags_negative(flags)))
				1269	return -ENOENT;
				1270	return 0;
				1271	}
				1272	return __traverse_mounts(path, flags, jumped, count, lookup_flags);
				1273	}
				1274
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1275	int follow_down_one(struct path *path)
				1276	{
				1277	struct vfsmount *mounted;
				1278
				1279	mounted = lookup_mnt(path);
				1280	if (mounted) {
				1281	dput(path->dentry);
				1282	mntput(path->mnt);
				1283	path->mnt = mounted;
				1284	path->dentry = dget(mounted->mnt_root);
				1285	return 1;
				1286	}
				1287	return 0;
				1288	}
				1289	EXPORT_SYMBOL(follow_down_one);
				1290
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1291	/*
				1292	* Follow down to the covering mount currently visible to userspace. At each
				1293	* point, the filesystem owning that dentry may be queried as to whether the
				1294	* caller is permitted to proceed or not.
				1295	*/
				1296	int follow_down(struct path *path)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1297	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1298	struct vfsmount *mnt = path->mnt;
				1299	bool jumped;
				1300	int ret = traverse_mounts(path, &jumped, NULL, 0);
				1301
				1302	if (path->mnt != mnt)
				1303	mntput(mnt);
				1304	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1305	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1306	EXPORT_SYMBOL(follow_down);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1307
				1308	/*
				1309	* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
				1310	* we meet a managed dentry that would need blocking.
				1311	*/
				1312	static bool __follow_mount_rcu(struct nameidata nd, struct path path,
				1313	struct inode *inode, unsigned seqp)
				1314	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1315	struct dentry *dentry = path->dentry;
				1316	unsigned int flags = dentry->d_flags;
				1317
				1318	if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
				1319	return true;
				1320
				1321	if (unlikely(nd->flags & LOOKUP_NO_XDEV))
				1322	return false;
				1323
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1324	for (;;) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1325	/*
				1326	* Don't forget we might have a non-mountpoint managed dentry
				1327	* that wants to block transit.
				1328	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1329	if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
				1330	int res = dentry->d_op->d_manage(path, true);
				1331	if (res)
				1332	return res == -EISDIR;
				1333	flags = dentry->d_flags;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1334	}
				1335
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1336	if (flags & DCACHE_MOUNTED) {
				1337	struct mount *mounted = __lookup_mnt(path->mnt, dentry);
				1338	if (mounted) {
				1339	path->mnt = &mounted->mnt;
				1340	dentry = path->dentry = mounted->mnt.mnt_root;
				1341	nd->flags \|= LOOKUP_JUMPED;
				1342	*seqp = read_seqcount_begin(&dentry->d_seq);
				1343	*inode = dentry->d_inode;
				1344	/*
				1345	* We don't need to re-check ->d_seq after this
				1346	* ->d_inode read - there will be an RCU delay
				1347	* between mount hash removal and ->mnt_root
				1348	* becoming unpinned.
				1349	*/
				1350	flags = dentry->d_flags;
				1351	continue;
				1352	}
				1353	if (read_seqretry(&mount_lock, nd->m_seq))
				1354	return false;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1355	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1356	return !(flags & DCACHE_NEED_AUTOMOUNT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1357	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1358	}
				1359
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1360	static inline int handle_mounts(struct nameidata nd, struct dentry dentry,
				1361	struct path path, struct inode *inode,
				1362	unsigned int *seqp)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1363	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1364	bool jumped;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1365	int ret;
				1366
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1367	path->mnt = nd->path.mnt;
				1368	path->dentry = dentry;
				1369	if (nd->flags & LOOKUP_RCU) {
				1370	unsigned int seq = *seqp;
				1371	if (unlikely(!*inode))
				1372	return -ENOENT;
				1373	if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
				1374	return 0;
				1375	if (unlazy_child(nd, dentry, seq))
				1376	return -ECHILD;
				1377	// *path might've been clobbered by __follow_mount_rcu()
				1378	path->mnt = nd->path.mnt;
				1379	path->dentry = dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1380	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1381	ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
				1382	if (jumped) {
				1383	if (unlikely(nd->flags & LOOKUP_NO_XDEV))
				1384	ret = -EXDEV;
				1385	else
				1386	nd->flags \|= LOOKUP_JUMPED;
				1387	}
				1388	if (unlikely(ret)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1389	dput(path->dentry);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1390	if (path->mnt != nd->path.mnt)
				1391	mntput(path->mnt);
				1392	} else {
				1393	*inode = d_backing_inode(path->dentry);
				1394	seqp = 0; / out of RCU mode, so the value doesn't matter */
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1395	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1396	return ret;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1397	}
				1398
				1399	/*
				1400	* This looks up the name in dcache and possibly revalidates the found dentry.
				1401	* NULL is returned if the dentry does not exist in the cache.
				1402	*/
				1403	static struct dentry lookup_dcache(const struct qstr name,
				1404	struct dentry *dir,
				1405	unsigned int flags)
				1406	{
				1407	struct dentry *dentry = d_lookup(dir, name);
				1408	if (dentry) {
				1409	int error = d_revalidate(dentry, flags);
				1410	if (unlikely(error <= 0)) {
				1411	if (!error)
				1412	d_invalidate(dentry);
				1413	dput(dentry);
				1414	return ERR_PTR(error);
				1415	}
				1416	}
				1417	return dentry;
				1418	}
				1419
				1420	/*
				1421	* Parent directory has inode locked exclusive. This is one
				1422	* and only case when ->lookup() gets called on non in-lookup
				1423	* dentries - as the matter of fact, this only gets called
				1424	* when directory is guaranteed to have no in-lookup children
				1425	* at all.
				1426	*/
				1427	static struct dentry __lookup_hash(const struct qstr name,
				1428	struct dentry *base, unsigned int flags)
				1429	{
				1430	struct dentry *dentry = lookup_dcache(name, base, flags);
				1431	struct dentry *old;
				1432	struct inode *dir = base->d_inode;
				1433
				1434	if (dentry)
				1435	return dentry;
				1436
				1437	/* Don't create child dentry for a dead directory. */
				1438	if (unlikely(IS_DEADDIR(dir)))
				1439	return ERR_PTR(-ENOENT);
				1440
				1441	dentry = d_alloc(base, name);
				1442	if (unlikely(!dentry))
				1443	return ERR_PTR(-ENOMEM);
				1444
				1445	old = dir->i_op->lookup(dir, dentry, flags);
				1446	if (unlikely(old)) {
				1447	dput(dentry);
				1448	dentry = old;
				1449	}
				1450	return dentry;
				1451	}
				1452
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1453	static struct dentry lookup_fast(struct nameidata nd,
				1454	struct inode **inode,
				1455	unsigned *seqp)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1456	{
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1457	struct dentry dentry, parent = nd->path.dentry;
				1458	int status = 1;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1459
				1460	/*
				1461	* Rename seqlock is not required here because in the off chance
				1462	* of a false negative due to a concurrent rename, the caller is
				1463	* going to fall back to non-racy lookup.
				1464	*/
				1465	if (nd->flags & LOOKUP_RCU) {
				1466	unsigned seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1467	dentry = __d_lookup_rcu(parent, &nd->last, &seq);
				1468	if (unlikely(!dentry)) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1469	if (!try_to_unlazy(nd))
				1470	return ERR_PTR(-ECHILD);
				1471	return NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1472	}
				1473
				1474	/*
				1475	* This sequence count validates that the inode matches
				1476	* the dentry name information from lookup.
				1477	*/
				1478	*inode = d_backing_inode(dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1479	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1480	return ERR_PTR(-ECHILD);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1481
				1482	/*
				1483	* This sequence count validates that the parent had no
				1484	* changes while we did the lookup of the dentry above.
				1485	*
				1486	* The memory barrier in read_seqcount_begin of child is
				1487	* enough, we can use __read_seqcount_retry here.
				1488	*/
				1489	if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1490	return ERR_PTR(-ECHILD);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1491
				1492	*seqp = seq;
				1493	status = d_revalidate(dentry, nd->flags);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1494	if (likely(status > 0))
				1495	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1496	if (unlazy_child(nd, dentry, seq))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1497	return ERR_PTR(-ECHILD);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1498	if (unlikely(status == -ECHILD))
				1499	/* we'd been told to redo it in non-rcu mode */
				1500	status = d_revalidate(dentry, nd->flags);
				1501	} else {
				1502	dentry = __d_lookup(parent, &nd->last);
				1503	if (unlikely(!dentry))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1504	return NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1505	status = d_revalidate(dentry, nd->flags);
				1506	}
				1507	if (unlikely(status <= 0)) {
				1508	if (!status)
				1509	d_invalidate(dentry);
				1510	dput(dentry);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1511	return ERR_PTR(status);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1512	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1513	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1514	}
				1515
				1516	/* Fast lookup failed, do it the slow way */
				1517	static struct dentry __lookup_slow(const struct qstr name,
				1518	struct dentry *dir,
				1519	unsigned int flags)
				1520	{
				1521	struct dentry dentry, old;
				1522	struct inode *inode = dir->d_inode;
				1523	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
				1524
				1525	/* Don't go there if it's already dead */
				1526	if (unlikely(IS_DEADDIR(inode)))
				1527	return ERR_PTR(-ENOENT);
				1528	again:
				1529	dentry = d_alloc_parallel(dir, name, &wq);
				1530	if (IS_ERR(dentry))
				1531	return dentry;
				1532	if (unlikely(!d_in_lookup(dentry))) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1533	int error = d_revalidate(dentry, flags);
				1534	if (unlikely(error <= 0)) {
				1535	if (!error) {
				1536	d_invalidate(dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1537	dput(dentry);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1538	goto again;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1539	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1540	dput(dentry);
				1541	dentry = ERR_PTR(error);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1542	}
				1543	} else {
				1544	old = inode->i_op->lookup(inode, dentry, flags);
				1545	d_lookup_done(dentry);
				1546	if (unlikely(old)) {
				1547	dput(dentry);
				1548	dentry = old;
				1549	}
				1550	}
				1551	return dentry;
				1552	}
				1553
				1554	static struct dentry lookup_slow(const struct qstr name,
				1555	struct dentry *dir,
				1556	unsigned int flags)
				1557	{
				1558	struct inode *inode = dir->d_inode;
				1559	struct dentry *res;
				1560	inode_lock_shared(inode);
				1561	res = __lookup_slow(name, dir, flags);
				1562	inode_unlock_shared(inode);
				1563	return res;
				1564	}
				1565
				1566	static inline int may_lookup(struct nameidata *nd)
				1567	{
				1568	if (nd->flags & LOOKUP_RCU) {
				1569	int err = inode_permission(nd->inode, MAY_EXEC\|MAY_NOT_BLOCK);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1570	if (err != -ECHILD \|\| !try_to_unlazy(nd))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1571	return err;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1572	}
				1573	return inode_permission(nd->inode, MAY_EXEC);
				1574	}
				1575
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1576	static int reserve_stack(struct nameidata nd, struct path link, unsigned seq)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1577	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1578	if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
				1579	return -ELOOP;
				1580
				1581	if (likely(nd->depth != EMBEDDED_LEVELS))
				1582	return 0;
				1583	if (likely(nd->stack != nd->internal))
				1584	return 0;
				1585	if (likely(nd_alloc_stack(nd)))
				1586	return 0;
				1587
				1588	if (nd->flags & LOOKUP_RCU) {
				1589	// we need to grab link before we do unlazy. And we can't skip
				1590	// unlazy even if we fail to grab the link - cleanup needs it
				1591	bool grabbed_link = legitimize_path(nd, link, seq);
				1592
				1593	if (!try_to_unlazy(nd) != 0 \|\| !grabbed_link)
				1594	return -ECHILD;
				1595
				1596	if (nd_alloc_stack(nd))
				1597	return 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1598	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1599	return -ENOMEM;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1600	}
				1601
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1602	enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1603
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1604	static const char pick_link(struct nameidata nd, struct path *link,
				1605	struct inode *inode, unsigned seq, int flags)
				1606	{
				1607	struct saved *last;
				1608	const char *res;
				1609	int error = reserve_stack(nd, link, seq);
				1610
				1611	if (unlikely(error)) {
				1612	if (!(nd->flags & LOOKUP_RCU))
				1613	path_put(link);
				1614	return ERR_PTR(error);
				1615	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1616	last = nd->stack + nd->depth++;
				1617	last->link = *link;
				1618	clear_delayed_call(&last->done);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1619	last->seq = seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1620
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1621	if (flags & WALK_TRAILING) {
				1622	error = may_follow_link(nd, inode);
				1623	if (unlikely(error))
				1624	return ERR_PTR(error);
				1625	}
				1626
				1627	if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) \|\|
				1628	unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
				1629	return ERR_PTR(-ELOOP);
				1630
				1631	if (!(nd->flags & LOOKUP_RCU)) {
				1632	touch_atime(&last->link);
				1633	cond_resched();
				1634	} else if (atime_needs_update(&last->link, inode)) {
				1635	if (!try_to_unlazy(nd))
				1636	return ERR_PTR(-ECHILD);
				1637	touch_atime(&last->link);
				1638	}
				1639
				1640	error = security_inode_follow_link(link->dentry, inode,
				1641	nd->flags & LOOKUP_RCU);
				1642	if (unlikely(error))
				1643	return ERR_PTR(error);
				1644
				1645	res = READ_ONCE(inode->i_link);
				1646	if (!res) {
				1647	const char * (get)(struct dentry , struct inode *,
				1648	struct delayed_call *);
				1649	get = inode->i_op->get_link;
				1650	if (nd->flags & LOOKUP_RCU) {
				1651	res = get(NULL, inode, &last->done);
				1652	if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
				1653	res = get(link->dentry, inode, &last->done);
				1654	} else {
				1655	res = get(link->dentry, inode, &last->done);
				1656	}
				1657	if (!res)
				1658	goto all_done;
				1659	if (IS_ERR(res))
				1660	return res;
				1661	}
				1662	if (*res == '/') {
				1663	error = nd_jump_root(nd);
				1664	if (unlikely(error))
				1665	return ERR_PTR(error);
				1666	while (unlikely(*++res == '/'))
				1667	;
				1668	}
				1669	if (*res)
				1670	return res;
				1671	all_done: // pure jump
				1672	put_link(nd);
				1673	return NULL;
				1674	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1675
				1676	/*
				1677	* Do we need to follow links? We _really_ want to be able
				1678	* to do this check without having to look at inode->i_op,
				1679	* so we keep a cache of "no, this doesn't need follow_link"
				1680	* for the common case.
				1681	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1682	static const char step_into(struct nameidata nd, int flags,
				1683	struct dentry dentry, struct inode inode, unsigned seq)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1684	{
				1685	struct path path;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1686	int err = handle_mounts(nd, dentry, &path, &inode, &seq);
				1687
				1688	if (err < 0)
				1689	return ERR_PTR(err);
				1690	if (likely(!d_is_symlink(path.dentry)) \|\|
				1691	((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) \|\|
				1692	(flags & WALK_NOFOLLOW)) {
				1693	/* not a symlink or should not follow */
				1694	if (!(nd->flags & LOOKUP_RCU)) {
				1695	dput(nd->path.dentry);
				1696	if (nd->path.mnt != path.mnt)
				1697	mntput(nd->path.mnt);
				1698	}
				1699	nd->path = path;
				1700	nd->inode = inode;
				1701	nd->seq = seq;
				1702	return NULL;
				1703	}
				1704	if (nd->flags & LOOKUP_RCU) {
				1705	/* make sure that d_is_symlink above matches inode */
				1706	if (read_seqcount_retry(&path.dentry->d_seq, seq))
				1707	return ERR_PTR(-ECHILD);
				1708	} else {
				1709	if (path.mnt == nd->path.mnt)
				1710	mntget(path.mnt);
				1711	}
				1712	return pick_link(nd, &path, inode, seq, flags);
				1713	}
				1714
				1715	static struct dentry follow_dotdot_rcu(struct nameidata nd,
				1716	struct inode **inodep,
				1717	unsigned *seqp)
				1718	{
				1719	struct dentry parent, old;
				1720
				1721	if (path_equal(&nd->path, &nd->root))
				1722	goto in_root;
				1723	if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
				1724	struct path path;
				1725	unsigned seq;
				1726	if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
				1727	&nd->root, &path, &seq))
				1728	goto in_root;
				1729	if (unlikely(nd->flags & LOOKUP_NO_XDEV))
				1730	return ERR_PTR(-ECHILD);
				1731	nd->path = path;
				1732	nd->inode = path.dentry->d_inode;
				1733	nd->seq = seq;
				1734	if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
				1735	return ERR_PTR(-ECHILD);
				1736	/* we know that mountpoint was pinned */
				1737	}
				1738	old = nd->path.dentry;
				1739	parent = old->d_parent;
				1740	*inodep = parent->d_inode;
				1741	*seqp = read_seqcount_begin(&parent->d_seq);
				1742	if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
				1743	return ERR_PTR(-ECHILD);
				1744	if (unlikely(!path_connected(nd->path.mnt, parent)))
				1745	return ERR_PTR(-ECHILD);
				1746	return parent;
				1747	in_root:
				1748	if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
				1749	return ERR_PTR(-ECHILD);
				1750	if (unlikely(nd->flags & LOOKUP_BENEATH))
				1751	return ERR_PTR(-ECHILD);
				1752	return NULL;
				1753	}
				1754
				1755	static struct dentry follow_dotdot(struct nameidata nd,
				1756	struct inode **inodep,
				1757	unsigned *seqp)
				1758	{
				1759	struct dentry *parent;
				1760
				1761	if (path_equal(&nd->path, &nd->root))
				1762	goto in_root;
				1763	if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
				1764	struct path path;
				1765
				1766	if (!choose_mountpoint(real_mount(nd->path.mnt),
				1767	&nd->root, &path))
				1768	goto in_root;
				1769	path_put(&nd->path);
				1770	nd->path = path;
				1771	nd->inode = path.dentry->d_inode;
				1772	if (unlikely(nd->flags & LOOKUP_NO_XDEV))
				1773	return ERR_PTR(-EXDEV);
				1774	}
				1775	/* rare case of legitimate dget_parent()... */
				1776	parent = dget_parent(nd->path.dentry);
				1777	if (unlikely(!path_connected(nd->path.mnt, parent))) {
				1778	dput(parent);
				1779	return ERR_PTR(-ENOENT);
				1780	}
				1781	*seqp = 0;
				1782	*inodep = parent->d_inode;
				1783	return parent;
				1784
				1785	in_root:
				1786	if (unlikely(nd->flags & LOOKUP_BENEATH))
				1787	return ERR_PTR(-EXDEV);
				1788	dget(nd->path.dentry);
				1789	return NULL;
				1790	}
				1791
				1792	static const char handle_dots(struct nameidata nd, int type)
				1793	{
				1794	if (type == LAST_DOTDOT) {
				1795	const char *error = NULL;
				1796	struct dentry *parent;
				1797	struct inode *inode;
				1798	unsigned seq;
				1799
				1800	if (!nd->root.mnt) {
				1801	error = ERR_PTR(set_root(nd));
				1802	if (error)
				1803	return error;
				1804	}
				1805	if (nd->flags & LOOKUP_RCU)
				1806	parent = follow_dotdot_rcu(nd, &inode, &seq);
				1807	else
				1808	parent = follow_dotdot(nd, &inode, &seq);
				1809	if (IS_ERR(parent))
				1810	return ERR_CAST(parent);
				1811	if (unlikely(!parent))
				1812	error = step_into(nd, WALK_NOFOLLOW,
				1813	nd->path.dentry, nd->inode, nd->seq);
				1814	else
				1815	error = step_into(nd, WALK_NOFOLLOW,
				1816	parent, inode, seq);
				1817	if (unlikely(error))
				1818	return error;
				1819
				1820	if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
				1821	/*
				1822	* If there was a racing rename or mount along our
				1823	* path, then we can't be sure that ".." hasn't jumped
				1824	* above nd->root (and so userspace should retry or use
				1825	* some fallback).
				1826	*/
				1827	smp_rmb();
				1828	if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
				1829	return ERR_PTR(-EAGAIN);
				1830	if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
				1831	return ERR_PTR(-EAGAIN);
				1832	}
				1833	}
				1834	return NULL;
				1835	}
				1836
				1837	static const char walk_component(struct nameidata nd, int flags)
				1838	{
				1839	struct dentry *dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1840	struct inode *inode;
				1841	unsigned seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1842	/*
				1843	* "." and ".." are special - ".." especially so because it has
				1844	* to be able to know about the current root directory and
				1845	* parent relationships.
				1846	*/
				1847	if (unlikely(nd->last_type != LAST_NORM)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1848	if (!(flags & WALK_MORE) && nd->depth)
				1849	put_link(nd);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1850	return handle_dots(nd, nd->last_type);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1851	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1852	dentry = lookup_fast(nd, &inode, &seq);
				1853	if (IS_ERR(dentry))
				1854	return ERR_CAST(dentry);
				1855	if (unlikely(!dentry)) {
				1856	dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
				1857	if (IS_ERR(dentry))
				1858	return ERR_CAST(dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1859	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	1860	if (!(flags & WALK_MORE) && nd->depth)
				1861	put_link(nd);
				1862	return step_into(nd, flags, dentry, inode, seq);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1863	}
				1864
				1865	/*
				1866	* We can do the critical dentry name comparison and hashing
				1867	* operations one word at a time, but we are limited to:
				1868	*
				1869	* - Architectures with fast unaligned word accesses. We could
				1870	* do a "get_unaligned()" if this helps and is sufficiently
				1871	* fast.
				1872	*
				1873	* - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
				1874	* do not trap on the (extremely unlikely) case of a page
				1875	* crossing operation.
				1876	*
				1877	* - Furthermore, we need an efficient 64-bit compile for the
				1878	* 64-bit case in order to generate the "number of bytes in
				1879	* the final mask". Again, that could be replaced with a
				1880	* efficient population count instruction or similar.
				1881	*/
				1882	#ifdef CONFIG_DCACHE_WORD_ACCESS
				1883
				1884	#include <asm/word-at-a-time.h>
				1885
				1886	#ifdef HASH_MIX
				1887
				1888	/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
				1889
				1890	#elif defined(CONFIG_64BIT)
				1891	/*
				1892	* Register pressure in the mixing function is an issue, particularly
				1893	* on 32-bit x86, but almost any function requires one state value and
				1894	* one temporary. Instead, use a function designed for two state values
				1895	* and no temporaries.
				1896	*
				1897	* This function cannot create a collision in only two iterations, so
				1898	* we have two iterations to achieve avalanche. In those two iterations,
				1899	* we have six layers of mixing, which is enough to spread one bit's
				1900	* influence out to 2^6 = 64 state bits.
				1901	*
				1902	* Rotate constants are scored by considering either 64 one-bit input
				1903	* deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
				1904	* probability of that delta causing a change to each of the 128 output
				1905	* bits, using a sample of random initial states.
				1906	*
				1907	* The Shannon entropy of the computed probabilities is then summed
				1908	* to produce a score. Ideally, any input change has a 50% chance of
				1909	* toggling any given output bit.
				1910	*
				1911	* Mixing scores (in bits) for (12,45):
				1912	* Input delta: 1-bit 2-bit
				1913	* 1 round: 713.3 42542.6
				1914	* 2 rounds: 2753.7 140389.8
				1915	* 3 rounds: 5954.1 233458.2
				1916	* 4 rounds: 7862.6 256672.2
				1917	* Perfect: 8192 258048
				1918	* (64128) (6463/2 * 128)
				1919	*/
				1920	#define HASH_MIX(x, y, a) \
				1921	( x ^= (a), \
				1922	y ^= x, x = rol64(x,12),\
				1923	x += y, y = rol64(y,45),\
				1924	y *= 9 )
				1925
				1926	/*
				1927	* Fold two longs into one 32-bit hash value. This must be fast, but
				1928	* latency isn't quite as critical, as there is a fair bit of additional
				1929	* work done before the hash value is used.
				1930	*/
				1931	static inline unsigned int fold_hash(unsigned long x, unsigned long y)
				1932	{
				1933	y ^= x * GOLDEN_RATIO_64;
				1934	y *= GOLDEN_RATIO_64;
				1935	return y >> 32;
				1936	}
				1937
				1938	#else /* 32-bit case */
				1939
				1940	/*
				1941	* Mixing scores (in bits) for (7,20):
				1942	* Input delta: 1-bit 2-bit
				1943	* 1 round: 330.3 9201.6
				1944	* 2 rounds: 1246.4 25475.4
				1945	* 3 rounds: 1907.1 31295.1
				1946	* 4 rounds: 2042.3 31718.6
				1947	* Perfect: 2048 31744
				1948	* (3264) (3231/2 * 64)
				1949	*/
				1950	#define HASH_MIX(x, y, a) \
				1951	( x ^= (a), \
				1952	y ^= x, x = rol32(x, 7),\
				1953	x += y, y = rol32(y,20),\
				1954	y *= 9 )
				1955
				1956	static inline unsigned int fold_hash(unsigned long x, unsigned long y)
				1957	{
				1958	/* Use arch-optimized multiply if one exists */
				1959	return __hash_32(y ^ __hash_32(x));
				1960	}
				1961
				1962	#endif
				1963
				1964	/*
				1965	* Return the hash of a string of known length. This is carfully
				1966	* designed to match hash_name(), which is the more critical function.
				1967	* In particular, we must end by hashing a final word containing 0..7
				1968	* payload bytes, to match the way that hash_name() iterates until it
				1969	* finds the delimiter after the name.
				1970	*/
				1971	unsigned int full_name_hash(const void salt, const char name, unsigned int len)
				1972	{
				1973	unsigned long a, x = 0, y = (unsigned long)salt;
				1974
				1975	for (;;) {
				1976	if (!len)
				1977	goto done;
				1978	a = load_unaligned_zeropad(name);
				1979	if (len < sizeof(unsigned long))
				1980	break;
				1981	HASH_MIX(x, y, a);
				1982	name += sizeof(unsigned long);
				1983	len -= sizeof(unsigned long);
				1984	}
				1985	x ^= a & bytemask_from_count(len);
				1986	done:
				1987	return fold_hash(x, y);
				1988	}
				1989	EXPORT_SYMBOL(full_name_hash);
				1990
				1991	/* Return the "hash_len" (hash and length) of a null-terminated string */
				1992	u64 hashlen_string(const void salt, const char name)
				1993	{
				1994	unsigned long a = 0, x = 0, y = (unsigned long)salt;
				1995	unsigned long adata, mask, len;
				1996	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
				1997
				1998	len = 0;
				1999	goto inside;
				2000
				2001	do {
				2002	HASH_MIX(x, y, a);
				2003	len += sizeof(unsigned long);
				2004	inside:
				2005	a = load_unaligned_zeropad(name+len);
				2006	} while (!has_zero(a, &adata, &constants));
				2007
				2008	adata = prep_zero_mask(a, adata, &constants);
				2009	mask = create_zero_mask(adata);
				2010	x ^= a & zero_bytemask(mask);
				2011
				2012	return hashlen_create(fold_hash(x, y), len + find_zero(mask));
				2013	}
				2014	EXPORT_SYMBOL(hashlen_string);
				2015
				2016	/*
				2017	* Calculate the length and hash of the path component, and
				2018	* return the "hash_len" as the result.
				2019	*/
				2020	static inline u64 hash_name(const void salt, const char name)
				2021	{
				2022	unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
				2023	unsigned long adata, bdata, mask, len;
				2024	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
				2025
				2026	len = 0;
				2027	goto inside;
				2028
				2029	do {
				2030	HASH_MIX(x, y, a);
				2031	len += sizeof(unsigned long);
				2032	inside:
				2033	a = load_unaligned_zeropad(name+len);
				2034	b = a ^ REPEAT_BYTE('/');
				2035	} while (!(has_zero(a, &adata, &constants) \| has_zero(b, &bdata, &constants)));
				2036
				2037	adata = prep_zero_mask(a, adata, &constants);
				2038	bdata = prep_zero_mask(b, bdata, &constants);
				2039	mask = create_zero_mask(adata \| bdata);
				2040	x ^= a & zero_bytemask(mask);
				2041
				2042	return hashlen_create(fold_hash(x, y), len + find_zero(mask));
				2043	}
				2044
				2045	#else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
				2046
				2047	/* Return the hash of a string of known length */
				2048	unsigned int full_name_hash(const void salt, const char name, unsigned int len)
				2049	{
				2050	unsigned long hash = init_name_hash(salt);
				2051	while (len--)
				2052	hash = partial_name_hash((unsigned char)*name++, hash);
				2053	return end_name_hash(hash);
				2054	}
				2055	EXPORT_SYMBOL(full_name_hash);
				2056
				2057	/* Return the "hash_len" (hash and length) of a null-terminated string */
				2058	u64 hashlen_string(const void salt, const char name)
				2059	{
				2060	unsigned long hash = init_name_hash(salt);
				2061	unsigned long len = 0, c;
				2062
				2063	c = (unsigned char)*name;
				2064	while (c) {
				2065	len++;
				2066	hash = partial_name_hash(c, hash);
				2067	c = (unsigned char)name[len];
				2068	}
				2069	return hashlen_create(end_name_hash(hash), len);
				2070	}
				2071	EXPORT_SYMBOL(hashlen_string);
				2072
				2073	/*
				2074	* We know there's a real path component here of at least
				2075	* one character.
				2076	*/
				2077	static inline u64 hash_name(const void salt, const char name)
				2078	{
				2079	unsigned long hash = init_name_hash(salt);
				2080	unsigned long len = 0, c;
				2081
				2082	c = (unsigned char)*name;
				2083	do {
				2084	len++;
				2085	hash = partial_name_hash(c, hash);
				2086	c = (unsigned char)name[len];
				2087	} while (c && c != '/');
				2088	return hashlen_create(end_name_hash(hash), len);
				2089	}
				2090
				2091	#endif
				2092
				2093	/*
				2094	* Name resolution.
				2095	* This is the basic name resolution function, turning a pathname into
				2096	* the final dentry. We expect 'base' to be positive and a directory.
				2097	*
				2098	* Returns 0 and nd will have valid dentry and mnt on success.
				2099	* Returns error and drops reference to input namei data on failure.
				2100	*/
				2101	static int link_path_walk(const char name, struct nameidata nd)
				2102	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2103	int depth = 0; // depth <= nd->depth
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2104	int err;
				2105
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2106	nd->last_type = LAST_ROOT;
				2107	nd->flags \|= LOOKUP_PARENT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2108	if (IS_ERR(name))
				2109	return PTR_ERR(name);
				2110	while (*name=='/')
				2111	name++;
				2112	if (!*name)
				2113	return 0;
				2114
				2115	/* At this point we know we have a real path component. */
				2116	for(;;) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2117	const char *link;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2118	u64 hash_len;
				2119	int type;
				2120
				2121	err = may_lookup(nd);
				2122	if (err)
				2123	return err;
				2124
				2125	hash_len = hash_name(nd->path.dentry, name);
				2126
				2127	type = LAST_NORM;
				2128	if (name[0] == '.') switch (hashlen_len(hash_len)) {
				2129	case 2:
				2130	if (name[1] == '.') {
				2131	type = LAST_DOTDOT;
				2132	nd->flags \|= LOOKUP_JUMPED;
				2133	}
				2134	break;
				2135	case 1:
				2136	type = LAST_DOT;
				2137	}
				2138	if (likely(type == LAST_NORM)) {
				2139	struct dentry *parent = nd->path.dentry;
				2140	nd->flags &= ~LOOKUP_JUMPED;
				2141	if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
				2142	struct qstr this = { { .hash_len = hash_len }, .name = name };
				2143	err = parent->d_op->d_hash(parent, &this);
				2144	if (err < 0)
				2145	return err;
				2146	hash_len = this.hash_len;
				2147	name = this.name;
				2148	}
				2149	}
				2150
				2151	nd->last.hash_len = hash_len;
				2152	nd->last.name = name;
				2153	nd->last_type = type;
				2154
				2155	name += hashlen_len(hash_len);
				2156	if (!*name)
				2157	goto OK;
				2158	/*
				2159	* If it wasn't NUL, we know it was '/'. Skip that
				2160	* slash, and continue until no more slashes.
				2161	*/
				2162	do {
				2163	name++;
				2164	} while (unlikely(*name == '/'));
				2165	if (unlikely(!*name)) {
				2166	OK:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2167	/* pathname or trailing symlink, done */
				2168	if (!depth) {
				2169	nd->dir_uid = nd->inode->i_uid;
				2170	nd->dir_mode = nd->inode->i_mode;
				2171	nd->flags &= ~LOOKUP_PARENT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2172	return 0;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2173	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2174	/* last component of nested symlink */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2175	name = nd->stack[--depth].name;
				2176	link = walk_component(nd, 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2177	} else {
				2178	/* not the last component */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2179	link = walk_component(nd, WALK_MORE);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2180	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2181	if (unlikely(link)) {
				2182	if (IS_ERR(link))
				2183	return PTR_ERR(link);
				2184	/* a symlink to follow */
				2185	nd->stack[depth++].name = name;
				2186	name = link;
				2187	continue;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2188	}
				2189	if (unlikely(!d_can_lookup(nd->path.dentry))) {
				2190	if (nd->flags & LOOKUP_RCU) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2191	if (!try_to_unlazy(nd))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2192	return -ECHILD;
				2193	}
				2194	return -ENOTDIR;
				2195	}
				2196	}
				2197	}
				2198
				2199	/* must be paired with terminate_walk() */
				2200	static const char path_init(struct nameidata nd, unsigned flags)
				2201	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2202	int error;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2203	const char *s = nd->name->name;
				2204
				2205	if (!*s)
				2206	flags &= ~LOOKUP_RCU;
				2207	if (flags & LOOKUP_RCU)
				2208	rcu_read_lock();
				2209
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2210	nd->flags = flags \| LOOKUP_JUMPED;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2211	nd->depth = 0;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2212
				2213	nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
				2214	nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
				2215	smp_rmb();
				2216
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2217	if (flags & LOOKUP_ROOT) {
				2218	struct dentry *root = nd->root.dentry;
				2219	struct inode *inode = root->d_inode;
				2220	if (*s && unlikely(!d_can_lookup(root)))
				2221	return ERR_PTR(-ENOTDIR);
				2222	nd->path = nd->root;
				2223	nd->inode = inode;
				2224	if (flags & LOOKUP_RCU) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2225	nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2226	nd->root_seq = nd->seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2227	} else {
				2228	path_get(&nd->path);
				2229	}
				2230	return s;
				2231	}
				2232
				2233	nd->root.mnt = NULL;
				2234	nd->path.mnt = NULL;
				2235	nd->path.dentry = NULL;
				2236
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2237	/* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
				2238	if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
				2239	error = nd_jump_root(nd);
				2240	if (unlikely(error))
				2241	return ERR_PTR(error);
				2242	return s;
				2243	}
				2244
				2245	/* Relative pathname -- get the starting-point it is relative to. */
				2246	if (nd->dfd == AT_FDCWD) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2247	if (flags & LOOKUP_RCU) {
				2248	struct fs_struct *fs = current->fs;
				2249	unsigned seq;
				2250
				2251	do {
				2252	seq = read_seqcount_begin(&fs->seq);
				2253	nd->path = fs->pwd;
				2254	nd->inode = nd->path.dentry->d_inode;
				2255	nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
				2256	} while (read_seqcount_retry(&fs->seq, seq));
				2257	} else {
				2258	get_fs_pwd(current->fs, &nd->path);
				2259	nd->inode = nd->path.dentry->d_inode;
				2260	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2261	} else {
				2262	/* Caller must check execute permissions on the starting path component */
				2263	struct fd f = fdget_raw(nd->dfd);
				2264	struct dentry *dentry;
				2265
				2266	if (!f.file)
				2267	return ERR_PTR(-EBADF);
				2268
				2269	dentry = f.file->f_path.dentry;
				2270
				2271	if (*s && unlikely(!d_can_lookup(dentry))) {
				2272	fdput(f);
				2273	return ERR_PTR(-ENOTDIR);
				2274	}
				2275
				2276	nd->path = f.file->f_path;
				2277	if (flags & LOOKUP_RCU) {
				2278	nd->inode = nd->path.dentry->d_inode;
				2279	nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
				2280	} else {
				2281	path_get(&nd->path);
				2282	nd->inode = nd->path.dentry->d_inode;
				2283	}
				2284	fdput(f);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2285	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2286
				2287	/* For scoped-lookups we need to set the root to the dirfd as well. */
				2288	if (flags & LOOKUP_IS_SCOPED) {
				2289	nd->root = nd->path;
				2290	if (flags & LOOKUP_RCU) {
				2291	nd->root_seq = nd->seq;
				2292	} else {
				2293	path_get(&nd->root);
				2294	nd->flags \|= LOOKUP_ROOT_GRABBED;
				2295	}
				2296	}
				2297	return s;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2298	}
				2299
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2300	static inline const char lookup_last(struct nameidata nd)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2301	{
				2302	if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
				2303	nd->flags \|= LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
				2304
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2305	return walk_component(nd, WALK_TRAILING);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2306	}
				2307
				2308	static int handle_lookup_down(struct nameidata *nd)
				2309	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2310	if (!(nd->flags & LOOKUP_RCU))
				2311	dget(nd->path.dentry);
				2312	return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
				2313	nd->path.dentry, nd->inode, nd->seq));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2314	}
				2315
				2316	/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
				2317	static int path_lookupat(struct nameidata nd, unsigned flags, struct path path)
				2318	{
				2319	const char *s = path_init(nd, flags);
				2320	int err;
				2321
				2322	if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
				2323	err = handle_lookup_down(nd);
				2324	if (unlikely(err < 0))
				2325	s = ERR_PTR(err);
				2326	}
				2327
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2328	while (!(err = link_path_walk(s, nd)) &&
				2329	(s = lookup_last(nd)) != NULL)
				2330	;
				2331	if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
				2332	err = handle_lookup_down(nd);
				2333	nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2334	}
				2335	if (!err)
				2336	err = complete_walk(nd);
				2337
				2338	if (!err && nd->flags & LOOKUP_DIRECTORY)
				2339	if (!d_can_lookup(nd->path.dentry))
				2340	err = -ENOTDIR;
				2341	if (!err) {
				2342	*path = nd->path;
				2343	nd->path.mnt = NULL;
				2344	nd->path.dentry = NULL;
				2345	}
				2346	terminate_walk(nd);
				2347	return err;
				2348	}
				2349
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2350	int filename_lookup(int dfd, struct filename *name, unsigned flags,
				2351	struct path path, struct path root)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2352	{
				2353	int retval;
				2354	struct nameidata nd;
				2355	if (IS_ERR(name))
				2356	return PTR_ERR(name);
				2357	if (unlikely(root)) {
				2358	nd.root = *root;
				2359	flags \|= LOOKUP_ROOT;
				2360	}
				2361	set_nameidata(&nd, dfd, name);
				2362	retval = path_lookupat(&nd, flags \| LOOKUP_RCU, path);
				2363	if (unlikely(retval == -ECHILD))
				2364	retval = path_lookupat(&nd, flags, path);
				2365	if (unlikely(retval == -ESTALE))
				2366	retval = path_lookupat(&nd, flags \| LOOKUP_REVAL, path);
				2367
				2368	if (likely(!retval))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2369	audit_inode(name, path->dentry,
				2370	flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2371	restore_nameidata();
				2372	putname(name);
				2373	return retval;
				2374	}
				2375
				2376	/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
				2377	static int path_parentat(struct nameidata *nd, unsigned flags,
				2378	struct path *parent)
				2379	{
				2380	const char *s = path_init(nd, flags);
				2381	int err = link_path_walk(s, nd);
				2382	if (!err)
				2383	err = complete_walk(nd);
				2384	if (!err) {
				2385	*parent = nd->path;
				2386	nd->path.mnt = NULL;
				2387	nd->path.dentry = NULL;
				2388	}
				2389	terminate_walk(nd);
				2390	return err;
				2391	}
				2392
				2393	static struct filename filename_parentat(int dfd, struct filename name,
				2394	unsigned int flags, struct path *parent,
				2395	struct qstr last, int type)
				2396	{
				2397	int retval;
				2398	struct nameidata nd;
				2399
				2400	if (IS_ERR(name))
				2401	return name;
				2402	set_nameidata(&nd, dfd, name);
				2403	retval = path_parentat(&nd, flags \| LOOKUP_RCU, parent);
				2404	if (unlikely(retval == -ECHILD))
				2405	retval = path_parentat(&nd, flags, parent);
				2406	if (unlikely(retval == -ESTALE))
				2407	retval = path_parentat(&nd, flags \| LOOKUP_REVAL, parent);
				2408	if (likely(!retval)) {
				2409	*last = nd.last;
				2410	*type = nd.last_type;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2411	audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2412	} else {
				2413	putname(name);
				2414	name = ERR_PTR(retval);
				2415	}
				2416	restore_nameidata();
				2417	return name;
				2418	}
				2419
				2420	/* does lookup, returns the object with parent locked */
				2421	struct dentry kern_path_locked(const char name, struct path *path)
				2422	{
				2423	struct filename *filename;
				2424	struct dentry *d;
				2425	struct qstr last;
				2426	int type;
				2427
				2428	filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
				2429	&last, &type);
				2430	if (IS_ERR(filename))
				2431	return ERR_CAST(filename);
				2432	if (unlikely(type != LAST_NORM)) {
				2433	path_put(path);
				2434	putname(filename);
				2435	return ERR_PTR(-EINVAL);
				2436	}
				2437	inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
				2438	d = __lookup_hash(&last, path->dentry, 0);
				2439	if (IS_ERR(d)) {
				2440	inode_unlock(path->dentry->d_inode);
				2441	path_put(path);
				2442	}
				2443	putname(filename);
				2444	return d;
				2445	}
				2446
				2447	int kern_path(const char name, unsigned int flags, struct path path)
				2448	{
				2449	return filename_lookup(AT_FDCWD, getname_kernel(name),
				2450	flags, path, NULL);
				2451	}
				2452	EXPORT_SYMBOL(kern_path);
				2453
				2454	/**
				2455	* vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
				2456	* @dentry: pointer to dentry of the base directory
				2457	* @mnt: pointer to vfs mount of the base directory
				2458	* @name: pointer to file name
				2459	* @flags: lookup flags
				2460	* @path: pointer to struct path to fill
				2461	*/
				2462	int vfs_path_lookup(struct dentry dentry, struct vfsmount mnt,
				2463	const char *name, unsigned int flags,
				2464	struct path *path)
				2465	{
				2466	struct path root = {.mnt = mnt, .dentry = dentry};
				2467	/* the first argument of filename_lookup() is ignored with root */
				2468	return filename_lookup(AT_FDCWD, getname_kernel(name),
				2469	flags , path, &root);
				2470	}
				2471	EXPORT_SYMBOL(vfs_path_lookup);
				2472
				2473	static int lookup_one_len_common(const char name, struct dentry base,
				2474	int len, struct qstr *this)
				2475	{
				2476	this->name = name;
				2477	this->len = len;
				2478	this->hash = full_name_hash(base, name, len);
				2479	if (!len)
				2480	return -EACCES;
				2481
				2482	if (unlikely(name[0] == '.')) {
				2483	if (len < 2 \|\| (len == 2 && name[1] == '.'))
				2484	return -EACCES;
				2485	}
				2486
				2487	while (len--) {
				2488	unsigned int c = (const unsigned char )name++;
				2489	if (c == '/' \|\| c == '\0')
				2490	return -EACCES;
				2491	}
				2492	/*
				2493	* See if the low-level filesystem might want
				2494	* to use its own hash..
				2495	*/
				2496	if (base->d_flags & DCACHE_OP_HASH) {
				2497	int err = base->d_op->d_hash(base, this);
				2498	if (err < 0)
				2499	return err;
				2500	}
				2501
				2502	return inode_permission(base->d_inode, MAY_EXEC);
				2503	}
				2504
				2505	/**
				2506	* try_lookup_one_len - filesystem helper to lookup single pathname component
				2507	* @name: pathname component to lookup
				2508	* @base: base directory to lookup from
				2509	* @len: maximum length @len should be interpreted to
				2510	*
				2511	* Look up a dentry by name in the dcache, returning NULL if it does not
				2512	* currently exist. The function does not try to create a dentry.
				2513	*
				2514	* Note that this routine is purely a helper for filesystem usage and should
				2515	* not be called by generic code.
				2516	*
				2517	* The caller must hold base->i_mutex.
				2518	*/
				2519	struct dentry try_lookup_one_len(const char name, struct dentry *base, int len)
				2520	{
				2521	struct qstr this;
				2522	int err;
				2523
				2524	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
				2525
				2526	err = lookup_one_len_common(name, base, len, &this);
				2527	if (err)
				2528	return ERR_PTR(err);
				2529
				2530	return lookup_dcache(&this, base, 0);
				2531	}
				2532	EXPORT_SYMBOL(try_lookup_one_len);
				2533
				2534	/**
				2535	* lookup_one_len - filesystem helper to lookup single pathname component
				2536	* @name: pathname component to lookup
				2537	* @base: base directory to lookup from
				2538	* @len: maximum length @len should be interpreted to
				2539	*
				2540	* Note that this routine is purely a helper for filesystem usage and should
				2541	* not be called by generic code.
				2542	*
				2543	* The caller must hold base->i_mutex.
				2544	*/
				2545	struct dentry lookup_one_len(const char name, struct dentry *base, int len)
				2546	{
				2547	struct dentry *dentry;
				2548	struct qstr this;
				2549	int err;
				2550
				2551	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
				2552
				2553	err = lookup_one_len_common(name, base, len, &this);
				2554	if (err)
				2555	return ERR_PTR(err);
				2556
				2557	dentry = lookup_dcache(&this, base, 0);
				2558	return dentry ? dentry : __lookup_slow(&this, base, 0);
				2559	}
				2560	EXPORT_SYMBOL(lookup_one_len);
				2561
				2562	/**
				2563	* lookup_one_len_unlocked - filesystem helper to lookup single pathname component
				2564	* @name: pathname component to lookup
				2565	* @base: base directory to lookup from
				2566	* @len: maximum length @len should be interpreted to
				2567	*
				2568	* Note that this routine is purely a helper for filesystem usage and should
				2569	* not be called by generic code.
				2570	*
				2571	* Unlike lookup_one_len, it should be called without the parent
				2572	* i_mutex held, and will take the i_mutex itself if necessary.
				2573	*/
				2574	struct dentry lookup_one_len_unlocked(const char name,
				2575	struct dentry *base, int len)
				2576	{
				2577	struct qstr this;
				2578	int err;
				2579	struct dentry *ret;
				2580
				2581	err = lookup_one_len_common(name, base, len, &this);
				2582	if (err)
				2583	return ERR_PTR(err);
				2584
				2585	ret = lookup_dcache(&this, base, 0);
				2586	if (!ret)
				2587	ret = lookup_slow(&this, base, 0);
				2588	return ret;
				2589	}
				2590	EXPORT_SYMBOL(lookup_one_len_unlocked);
				2591
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2592	/*
				2593	* Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
				2594	* on negatives. Returns known positive or ERR_PTR(); that's what
				2595	* most of the users want. Note that pinned negative with unlocked parent
				2596	* _can_ become positive at any time, so callers of lookup_one_len_unlocked()
				2597	* need to be very careful; pinned positives have ->d_inode stable, so
				2598	* this one avoids such problems.
				2599	*/
				2600	struct dentry lookup_positive_unlocked(const char name,
				2601	struct dentry *base, int len)
				2602	{
				2603	struct dentry *ret = lookup_one_len_unlocked(name, base, len);
				2604	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
				2605	dput(ret);
				2606	ret = ERR_PTR(-ENOENT);
				2607	}
				2608	return ret;
				2609	}
				2610	EXPORT_SYMBOL(lookup_positive_unlocked);
				2611
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2612	#ifdef CONFIG_UNIX98_PTYS
				2613	int path_pts(struct path *path)
				2614	{
				2615	/* Find something mounted on "pts" in the same directory as
				2616	* the input path.
				2617	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2618	struct dentry *parent = dget_parent(path->dentry);
				2619	struct dentry *child;
				2620	struct qstr this = QSTR_INIT("pts", 3);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2621
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2622	if (unlikely(!path_connected(path->mnt, parent))) {
				2623	dput(parent);
				2624	return -ENOENT;
				2625	}
				2626	dput(path->dentry);
				2627	path->dentry = parent;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2628	child = d_hash_and_lookup(parent, &this);
				2629	if (!child)
				2630	return -ENOENT;
				2631
				2632	path->dentry = child;
				2633	dput(parent);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2634	follow_down(path);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2635	return 0;
				2636	}
				2637	#endif
				2638
				2639	int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
				2640	struct path path, int empty)
				2641	{
				2642	return filename_lookup(dfd, getname_flags(name, flags, empty),
				2643	flags, path, NULL);
				2644	}
				2645	EXPORT_SYMBOL(user_path_at_empty);
				2646
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2647	int __check_sticky(struct inode dir, struct inode inode)
				2648	{
				2649	kuid_t fsuid = current_fsuid();
				2650
				2651	if (uid_eq(inode->i_uid, fsuid))
				2652	return 0;
				2653	if (uid_eq(dir->i_uid, fsuid))
				2654	return 0;
				2655	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
				2656	}
				2657	EXPORT_SYMBOL(__check_sticky);
				2658
				2659	/*
				2660	* Check whether we can remove a link victim from directory dir, check
				2661	* whether the type of victim is right.
				2662	* 1. We can't do it if dir is read-only (done in permission())
				2663	* 2. We should have write and exec permissions on dir
				2664	* 3. We can't remove anything from append-only dir
				2665	* 4. We can't do anything with immutable dir (done in permission())
				2666	* 5. If the sticky bit on dir is set we should either
				2667	* a. be owner of dir, or
				2668	* b. be owner of victim, or
				2669	* c. have CAP_FOWNER capability
				2670	* 6. If the victim is append-only or immutable we can't do antyhing with
				2671	* links pointing to it.
				2672	* 7. If the victim has an unknown uid or gid we can't change the inode.
				2673	* 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
				2674	* 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
				2675	* 10. We can't remove a root or mountpoint.
				2676	* 11. We don't allow removal of NFS sillyrenamed files; it's handled by
				2677	* nfs_async_unlink().
				2678	*/
				2679	static int may_delete(struct inode dir, struct dentry victim, bool isdir)
				2680	{
				2681	struct inode *inode = d_backing_inode(victim);
				2682	int error;
				2683
				2684	if (d_is_negative(victim))
				2685	return -ENOENT;
				2686	BUG_ON(!inode);
				2687
				2688	BUG_ON(victim->d_parent->d_inode != dir);
				2689
				2690	/* Inode writeback is not safe when the uid or gid are invalid. */
				2691	if (!uid_valid(inode->i_uid) \|\| !gid_valid(inode->i_gid))
				2692	return -EOVERFLOW;
				2693
				2694	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
				2695
				2696	error = inode_permission(dir, MAY_WRITE \| MAY_EXEC);
				2697	if (error)
				2698	return error;
				2699	if (IS_APPEND(dir))
				2700	return -EPERM;
				2701
				2702	if (check_sticky(dir, inode) \|\| IS_APPEND(inode) \|\|
				2703	IS_IMMUTABLE(inode) \|\| IS_SWAPFILE(inode) \|\| HAS_UNMAPPED_ID(inode))
				2704	return -EPERM;
				2705	if (isdir) {
				2706	if (!d_is_dir(victim))
				2707	return -ENOTDIR;
				2708	if (IS_ROOT(victim))
				2709	return -EBUSY;
				2710	} else if (d_is_dir(victim))
				2711	return -EISDIR;
				2712	if (IS_DEADDIR(dir))
				2713	return -ENOENT;
				2714	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
				2715	return -EBUSY;
				2716	return 0;
				2717	}
				2718
				2719	/* Check whether we can create an object with dentry child in directory
				2720	* dir.
				2721	* 1. We can't do it if child already exists (open has special treatment for
				2722	* this case, but since we are inlined it's OK)
				2723	* 2. We can't do it if dir is read-only (done in permission())
				2724	* 3. We can't do it if the fs can't represent the fsuid or fsgid.
				2725	* 4. We should have write and exec permissions on dir
				2726	* 5. We can't do it if dir is immutable (done in permission())
				2727	*/
				2728	static inline int may_create(struct inode dir, struct dentry child)
				2729	{
				2730	struct user_namespace *s_user_ns;
				2731	audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
				2732	if (child->d_inode)
				2733	return -EEXIST;
				2734	if (IS_DEADDIR(dir))
				2735	return -ENOENT;
				2736	s_user_ns = dir->i_sb->s_user_ns;
				2737	if (!kuid_has_mapping(s_user_ns, current_fsuid()) \|\|
				2738	!kgid_has_mapping(s_user_ns, current_fsgid()))
				2739	return -EOVERFLOW;
				2740	return inode_permission(dir, MAY_WRITE \| MAY_EXEC);
				2741	}
				2742
				2743	/*
				2744	* p1 and p2 should be directories on the same fs.
				2745	*/
				2746	struct dentry lock_rename(struct dentry p1, struct dentry *p2)
				2747	{
				2748	struct dentry *p;
				2749
				2750	if (p1 == p2) {
				2751	inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
				2752	return NULL;
				2753	}
				2754
				2755	mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
				2756
				2757	p = d_ancestor(p2, p1);
				2758	if (p) {
				2759	inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
				2760	inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
				2761	return p;
				2762	}
				2763
				2764	p = d_ancestor(p1, p2);
				2765	if (p) {
				2766	inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
				2767	inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
				2768	return p;
				2769	}
				2770
				2771	inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
				2772	inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
				2773	return NULL;
				2774	}
				2775	EXPORT_SYMBOL(lock_rename);
				2776
				2777	void unlock_rename(struct dentry p1, struct dentry p2)
				2778	{
				2779	inode_unlock(p1->d_inode);
				2780	if (p1 != p2) {
				2781	inode_unlock(p2->d_inode);
				2782	mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
				2783	}
				2784	}
				2785	EXPORT_SYMBOL(unlock_rename);
				2786
				2787	int vfs_create(struct inode dir, struct dentry dentry, umode_t mode,
				2788	bool want_excl)
				2789	{
				2790	int error = may_create(dir, dentry);
				2791	if (error)
				2792	return error;
				2793
				2794	if (!dir->i_op->create)
				2795	return -EACCES; /* shouldn't it be ENOSYS? */
				2796	mode &= S_IALLUGO;
				2797	mode \|= S_IFREG;
				2798	error = security_inode_create(dir, dentry, mode);
				2799	if (error)
				2800	return error;
				2801	error = dir->i_op->create(dir, dentry, mode, want_excl);
				2802	if (!error)
				2803	fsnotify_create(dir, dentry);
				2804	return error;
				2805	}
				2806	EXPORT_SYMBOL(vfs_create);
				2807
				2808	int vfs_mkobj(struct dentry *dentry, umode_t mode,
				2809	int (f)(struct dentry , umode_t, void *),
				2810	void *arg)
				2811	{
				2812	struct inode *dir = dentry->d_parent->d_inode;
				2813	int error = may_create(dir, dentry);
				2814	if (error)
				2815	return error;
				2816
				2817	mode &= S_IALLUGO;
				2818	mode \|= S_IFREG;
				2819	error = security_inode_create(dir, dentry, mode);
				2820	if (error)
				2821	return error;
				2822	error = f(dentry, mode, arg);
				2823	if (!error)
				2824	fsnotify_create(dir, dentry);
				2825	return error;
				2826	}
				2827	EXPORT_SYMBOL(vfs_mkobj);
				2828
				2829	bool may_open_dev(const struct path *path)
				2830	{
				2831	return !(path->mnt->mnt_flags & MNT_NODEV) &&
				2832	!(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
				2833	}
				2834
				2835	static int may_open(const struct path *path, int acc_mode, int flag)
				2836	{
				2837	struct dentry *dentry = path->dentry;
				2838	struct inode *inode = dentry->d_inode;
				2839	int error;
				2840
				2841	if (!inode)
				2842	return -ENOENT;
				2843
				2844	switch (inode->i_mode & S_IFMT) {
				2845	case S_IFLNK:
				2846	return -ELOOP;
				2847	case S_IFDIR:
				2848	if (acc_mode & MAY_WRITE)
				2849	return -EISDIR;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2850	if (acc_mode & MAY_EXEC)
				2851	return -EACCES;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2852	break;
				2853	case S_IFBLK:
				2854	case S_IFCHR:
				2855	if (!may_open_dev(path))
				2856	return -EACCES;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2857	fallthrough;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2858	case S_IFIFO:
				2859	case S_IFSOCK:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2860	if (acc_mode & MAY_EXEC)
				2861	return -EACCES;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2862	flag &= ~O_TRUNC;
				2863	break;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2864	case S_IFREG:
				2865	if ((acc_mode & MAY_EXEC) && path_noexec(path))
				2866	return -EACCES;
				2867	break;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2868	}
				2869
				2870	error = inode_permission(inode, MAY_OPEN \| acc_mode);
				2871	if (error)
				2872	return error;
				2873
				2874	/*
				2875	* An append-only file must be opened in append mode for writing.
				2876	*/
				2877	if (IS_APPEND(inode)) {
				2878	if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
				2879	return -EPERM;
				2880	if (flag & O_TRUNC)
				2881	return -EPERM;
				2882	}
				2883
				2884	/* O_NOATIME can only be set by the owner or superuser */
				2885	if (flag & O_NOATIME && !inode_owner_or_capable(inode))
				2886	return -EPERM;
				2887
				2888	return 0;
				2889	}
				2890
				2891	static int handle_truncate(struct file *filp)
				2892	{
				2893	const struct path *path = &filp->f_path;
				2894	struct inode *inode = path->dentry->d_inode;
				2895	int error = get_write_access(inode);
				2896	if (error)
				2897	return error;
				2898	/*
				2899	* Refuse to truncate files with mandatory locks held on them.
				2900	*/
				2901	error = locks_verify_locked(filp);
				2902	if (!error)
				2903	error = security_path_truncate(path);
				2904	if (!error) {
				2905	error = do_truncate(path->dentry, 0,
				2906	ATTR_MTIME\|ATTR_CTIME\|ATTR_OPEN,
				2907	filp);
				2908	}
				2909	put_write_access(inode);
				2910	return error;
				2911	}
				2912
				2913	static inline int open_to_namei_flags(int flag)
				2914	{
				2915	if ((flag & O_ACCMODE) == 3)
				2916	flag--;
				2917	return flag;
				2918	}
				2919
				2920	static int may_o_create(const struct path dir, struct dentry dentry, umode_t mode)
				2921	{
				2922	struct user_namespace *s_user_ns;
				2923	int error = security_path_mknod(dir, dentry, mode, 0);
				2924	if (error)
				2925	return error;
				2926
				2927	s_user_ns = dir->dentry->d_sb->s_user_ns;
				2928	if (!kuid_has_mapping(s_user_ns, current_fsuid()) \|\|
				2929	!kgid_has_mapping(s_user_ns, current_fsgid()))
				2930	return -EOVERFLOW;
				2931
				2932	error = inode_permission(dir->dentry->d_inode, MAY_WRITE \| MAY_EXEC);
				2933	if (error)
				2934	return error;
				2935
				2936	return security_inode_create(dir->dentry->d_inode, dentry, mode);
				2937	}
				2938
				2939	/*
				2940	* Attempt to atomically look up, create and open a file from a negative
				2941	* dentry.
				2942	*
				2943	* Returns 0 if successful. The file will have been created and attached to
				2944	* @file by the filesystem calling finish_open().
				2945	*
				2946	* If the file was looked up only or didn't need creating, FMODE_OPENED won't
				2947	* be set. The caller will need to perform the open themselves. @path will
				2948	* have been updated to point to the new dentry. This may be negative.
				2949	*
				2950	* Returns an error code otherwise.
				2951	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2952	static struct dentry atomic_open(struct nameidata nd, struct dentry *dentry,
				2953	struct file *file,
				2954	int open_flag, umode_t mode)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2955	{
				2956	struct dentry const DENTRY_NOT_SET = (void ) -1UL;
				2957	struct inode *dir = nd->path.dentry->d_inode;
				2958	int error;
				2959
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2960	if (nd->flags & LOOKUP_DIRECTORY)
				2961	open_flag \|= O_DIRECTORY;
				2962
				2963	file->f_path.dentry = DENTRY_NOT_SET;
				2964	file->f_path.mnt = nd->path.mnt;
				2965	error = dir->i_op->atomic_open(dir, dentry, file,
				2966	open_to_namei_flags(open_flag), mode);
				2967	d_lookup_done(dentry);
				2968	if (!error) {
				2969	if (file->f_mode & FMODE_OPENED) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2970	if (unlikely(dentry != file->f_path.dentry)) {
				2971	dput(dentry);
				2972	dentry = dget(file->f_path.dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2973	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2974	} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
				2975	error = -EIO;
				2976	} else {
				2977	if (file->f_path.dentry) {
				2978	dput(dentry);
				2979	dentry = file->f_path.dentry;
				2980	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2981	if (unlikely(d_is_negative(dentry)))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2982	error = -ENOENT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2983	}
				2984	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	2985	if (error) {
				2986	dput(dentry);
				2987	dentry = ERR_PTR(error);
				2988	}
				2989	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2990	}
				2991
				2992	/*
				2993	* Look up and maybe create and open the last component.
				2994	*
				2995	* Must be called with parent locked (exclusive in O_CREAT case).
				2996	*
				2997	* Returns 0 on success, that is, if
				2998	* the file was successfully atomically created (if necessary) and opened, or
				2999	* the file was not completely opened at this time, though lookups and
				3000	* creations were performed.
				3001	* These case are distinguished by presence of FMODE_OPENED on file->f_mode.
				3002	* In the latter case dentry returned in @path might be negative if O_CREAT
				3003	* hadn't been specified.
				3004	*
				3005	* An error code is returned on failure.
				3006	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3007	static struct dentry lookup_open(struct nameidata nd, struct file *file,
				3008	const struct open_flags *op,
				3009	bool got_write)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3010	{
				3011	struct dentry *dir = nd->path.dentry;
				3012	struct inode *dir_inode = dir->d_inode;
				3013	int open_flag = op->open_flag;
				3014	struct dentry *dentry;
				3015	int error, create_error = 0;
				3016	umode_t mode = op->mode;
				3017	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
				3018
				3019	if (unlikely(IS_DEADDIR(dir_inode)))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3020	return ERR_PTR(-ENOENT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3021
				3022	file->f_mode &= ~FMODE_CREATED;
				3023	dentry = d_lookup(dir, &nd->last);
				3024	for (;;) {
				3025	if (!dentry) {
				3026	dentry = d_alloc_parallel(dir, &nd->last, &wq);
				3027	if (IS_ERR(dentry))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3028	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3029	}
				3030	if (d_in_lookup(dentry))
				3031	break;
				3032
				3033	error = d_revalidate(dentry, nd->flags);
				3034	if (likely(error > 0))
				3035	break;
				3036	if (error)
				3037	goto out_dput;
				3038	d_invalidate(dentry);
				3039	dput(dentry);
				3040	dentry = NULL;
				3041	}
				3042	if (dentry->d_inode) {
				3043	/* Cached positive dentry: will open in f_op->open */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3044	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3045	}
				3046
				3047	/*
				3048	* Checking write permission is tricky, bacuse we don't know if we are
				3049	* going to actually need it: O_CREAT opens should work as long as the
				3050	* file exists. But checking existence breaks atomicity. The trick is
				3051	* to check access and if not granted clear O_CREAT from the flags.
				3052	*
				3053	* Another problem is returing the "right" error value (e.g. for an
				3054	* O_EXCL open we want to return EEXIST not EROFS).
				3055	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3056	if (unlikely(!got_write))
				3057	open_flag &= ~O_TRUNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3058	if (open_flag & O_CREAT) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3059	if (open_flag & O_EXCL)
				3060	open_flag &= ~O_TRUNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3061	if (!IS_POSIXACL(dir->d_inode))
				3062	mode &= ~current_umask();
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3063	if (likely(got_write))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3064	create_error = may_o_create(&nd->path, dentry, mode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3065	else
				3066	create_error = -EROFS;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3067	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3068	if (create_error)
				3069	open_flag &= ~O_CREAT;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3070	if (dir_inode->i_op->atomic_open) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3071	dentry = atomic_open(nd, dentry, file, open_flag, mode);
				3072	if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
				3073	dentry = ERR_PTR(create_error);
				3074	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3075	}
				3076
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3077	if (d_in_lookup(dentry)) {
				3078	struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
				3079	nd->flags);
				3080	d_lookup_done(dentry);
				3081	if (unlikely(res)) {
				3082	if (IS_ERR(res)) {
				3083	error = PTR_ERR(res);
				3084	goto out_dput;
				3085	}
				3086	dput(dentry);
				3087	dentry = res;
				3088	}
				3089	}
				3090
				3091	/* Negative dentry, just create the file */
				3092	if (!dentry->d_inode && (open_flag & O_CREAT)) {
				3093	file->f_mode \|= FMODE_CREATED;
				3094	audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
				3095	if (!dir_inode->i_op->create) {
				3096	error = -EACCES;
				3097	goto out_dput;
				3098	}
				3099	error = dir_inode->i_op->create(dir_inode, dentry, mode,
				3100	open_flag & O_EXCL);
				3101	if (error)
				3102	goto out_dput;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3103	}
				3104	if (unlikely(create_error) && !dentry->d_inode) {
				3105	error = create_error;
				3106	goto out_dput;
				3107	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3108	return dentry;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3109
				3110	out_dput:
				3111	dput(dentry);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3112	return ERR_PTR(error);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3113	}
				3114
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3115	static const char open_last_lookups(struct nameidata nd,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3116	struct file file, const struct open_flags op)
				3117	{
				3118	struct dentry *dir = nd->path.dentry;
				3119	int open_flag = op->open_flag;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3120	bool got_write = false;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3121	unsigned seq;
				3122	struct inode *inode;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3123	struct dentry *dentry;
				3124	const char *res;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3125
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3126	nd->flags \|= op->intent;
				3127
				3128	if (nd->last_type != LAST_NORM) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3129	if (nd->depth)
				3130	put_link(nd);
				3131	return handle_dots(nd, nd->last_type);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3132	}
				3133
				3134	if (!(open_flag & O_CREAT)) {
				3135	if (nd->last.name[nd->last.len])
				3136	nd->flags \|= LOOKUP_FOLLOW \| LOOKUP_DIRECTORY;
				3137	/* we _can_ be in RCU mode here */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3138	dentry = lookup_fast(nd, &inode, &seq);
				3139	if (IS_ERR(dentry))
				3140	return ERR_CAST(dentry);
				3141	if (likely(dentry))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3142	goto finish_lookup;
				3143
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3144	BUG_ON(nd->flags & LOOKUP_RCU);
				3145	} else {
				3146	/* create side of things */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3147	if (nd->flags & LOOKUP_RCU) {
				3148	if (!try_to_unlazy(nd))
				3149	return ERR_PTR(-ECHILD);
				3150	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3151	audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3152	/* trailing slashes? */
				3153	if (unlikely(nd->last.name[nd->last.len]))
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3154	return ERR_PTR(-EISDIR);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3155	}
				3156
				3157	if (open_flag & (O_CREAT \| O_TRUNC \| O_WRONLY \| O_RDWR)) {
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3158	got_write = !mnt_want_write(nd->path.mnt);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3159	/*
				3160	* do _not_ fail yet - we might not need that or fail with
				3161	* a different error; let lookup_open() decide; we'll be
				3162	* dropping this one anyway.
				3163	*/
				3164	}
				3165	if (open_flag & O_CREAT)
				3166	inode_lock(dir->d_inode);
				3167	else
				3168	inode_lock_shared(dir->d_inode);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3169	dentry = lookup_open(nd, file, op, got_write);
				3170	if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
				3171	fsnotify_create(dir->d_inode, dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3172	if (open_flag & O_CREAT)
				3173	inode_unlock(dir->d_inode);
				3174	else
				3175	inode_unlock_shared(dir->d_inode);
				3176
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3177	if (got_write)
				3178	mnt_drop_write(nd->path.mnt);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3179
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3180	if (IS_ERR(dentry))
				3181	return ERR_CAST(dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3182
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3183	if (file->f_mode & (FMODE_OPENED \| FMODE_CREATED)) {
				3184	dput(nd->path.dentry);
				3185	nd->path.dentry = dentry;
				3186	return NULL;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3187	}
				3188
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3189	finish_lookup:
				3190	if (nd->depth)
				3191	put_link(nd);
				3192	res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
				3193	if (unlikely(res))
				3194	nd->flags &= ~(LOOKUP_OPEN\|LOOKUP_CREATE\|LOOKUP_EXCL);
				3195	return res;
				3196	}
				3197
				3198	/*
				3199	* Handle the last step of open()
				3200	*/
				3201	static int do_open(struct nameidata *nd,
				3202	struct file file, const struct open_flags op)
				3203	{
				3204	int open_flag = op->open_flag;
				3205	bool do_truncate;
				3206	int acc_mode;
				3207	int error;
				3208
				3209	if (!(file->f_mode & (FMODE_OPENED \| FMODE_CREATED))) {
				3210	error = complete_walk(nd);
				3211	if (error)
				3212	return error;
				3213	}
				3214	if (!(file->f_mode & FMODE_CREATED))
				3215	audit_inode(nd->name, nd->path.dentry, 0);
				3216	if (open_flag & O_CREAT) {
				3217	if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
				3218	return -EEXIST;
				3219	if (d_is_dir(nd->path.dentry))
				3220	return -EISDIR;
				3221	error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
				3222	d_backing_inode(nd->path.dentry));
				3223	if (unlikely(error))
				3224	return error;
				3225	}
				3226	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
				3227	return -ENOTDIR;
				3228
				3229	do_truncate = false;
				3230	acc_mode = op->acc_mode;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3231	if (file->f_mode & FMODE_CREATED) {
				3232	/* Don't check for write permission, don't truncate */
				3233	open_flag &= ~O_TRUNC;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3234	acc_mode = 0;
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3235	} else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3236	error = mnt_want_write(nd->path.mnt);
				3237	if (error)
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3238	return error;
				3239	do_truncate = true;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3240	}
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3241	error = may_open(&nd->path, acc_mode, open_flag);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3242	if (!error && !(file->f_mode & FMODE_OPENED))
				3243	error = vfs_open(&nd->path, file);
				3244	if (!error)
				3245	error = ima_file_check(file, op->acc_mode);
				3246	if (!error && do_truncate)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3247	error = handle_truncate(file);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3248	if (unlikely(error > 0)) {
				3249	WARN_ON(1);
				3250	error = -EINVAL;
				3251	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3252	if (do_truncate)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3253	mnt_drop_write(nd->path.mnt);
				3254	return error;
				3255	}
				3256
				3257	struct dentry vfs_tmpfile(struct dentry dentry, umode_t mode, int open_flag)
				3258	{
				3259	struct dentry *child = NULL;
				3260	struct inode *dir = dentry->d_inode;
				3261	struct inode *inode;
				3262	int error;
				3263
				3264	/* we want directory to be writable */
				3265	error = inode_permission(dir, MAY_WRITE \| MAY_EXEC);
				3266	if (error)
				3267	goto out_err;
				3268	error = -EOPNOTSUPP;
				3269	if (!dir->i_op->tmpfile)
				3270	goto out_err;
				3271	error = -ENOMEM;
				3272	child = d_alloc(dentry, &slash_name);
				3273	if (unlikely(!child))
				3274	goto out_err;
				3275	error = dir->i_op->tmpfile(dir, child, mode);
				3276	if (error)
				3277	goto out_err;
				3278	error = -ENOENT;
				3279	inode = child->d_inode;
				3280	if (unlikely(!inode))
				3281	goto out_err;
				3282	if (!(open_flag & O_EXCL)) {
				3283	spin_lock(&inode->i_lock);
				3284	inode->i_state \|= I_LINKABLE;
				3285	spin_unlock(&inode->i_lock);
				3286	}
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3287	ima_post_create_tmpfile(inode);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3288	return child;
				3289
				3290	out_err:
				3291	dput(child);
				3292	return ERR_PTR(error);
				3293	}
				3294	EXPORT_SYMBOL(vfs_tmpfile);
				3295
				3296	static int do_tmpfile(struct nameidata *nd, unsigned flags,
				3297	const struct open_flags *op,
				3298	struct file *file)
				3299	{
				3300	struct dentry *child;
				3301	struct path path;
				3302	int error = path_lookupat(nd, flags \| LOOKUP_DIRECTORY, &path);
				3303	if (unlikely(error))
				3304	return error;
				3305	error = mnt_want_write(path.mnt);
				3306	if (unlikely(error))
				3307	goto out;
				3308	child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
				3309	error = PTR_ERR(child);
				3310	if (IS_ERR(child))
				3311	goto out2;
				3312	dput(path.dentry);
				3313	path.dentry = child;
				3314	audit_inode(nd->name, child, 0);
				3315	/* Don't check for other permissions, the inode was just created */
				3316	error = may_open(&path, 0, op->open_flag);
				3317	if (error)
				3318	goto out2;
				3319	file->f_path.mnt = path.mnt;
				3320	error = finish_open(file, child, NULL);
				3321	out2:
				3322	mnt_drop_write(path.mnt);
				3323	out:
				3324	path_put(&path);
				3325	return error;
				3326	}
				3327
				3328	static int do_o_path(struct nameidata nd, unsigned flags, struct file file)
				3329	{
				3330	struct path path;
				3331	int error = path_lookupat(nd, flags, &path);
				3332	if (!error) {
				3333	audit_inode(nd->name, path.dentry, 0);
				3334	error = vfs_open(&path, file);
				3335	path_put(&path);
				3336	}
				3337	return error;
				3338	}
				3339
				3340	static struct file path_openat(struct nameidata nd,
				3341	const struct open_flags *op, unsigned flags)
				3342	{
				3343	struct file *file;
				3344	int error;
				3345
				3346	file = alloc_empty_file(op->open_flag, current_cred());
				3347	if (IS_ERR(file))
				3348	return file;
				3349
				3350	if (unlikely(file->f_flags & __O_TMPFILE)) {
				3351	error = do_tmpfile(nd, flags, op, file);
				3352	} else if (unlikely(file->f_flags & O_PATH)) {
				3353	error = do_o_path(nd, flags, file);
				3354	} else {
				3355	const char *s = path_init(nd, flags);
				3356	while (!(error = link_path_walk(s, nd)) &&
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3357	(s = open_last_lookups(nd, file, op)) != NULL)
				3358	;
				3359	if (!error)
				3360	error = do_open(nd, file, op);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3361	terminate_walk(nd);
				3362	}
				3363	if (likely(!error)) {
				3364	if (likely(file->f_mode & FMODE_OPENED))
				3365	return file;
				3366	WARN_ON(1);
				3367	error = -EINVAL;
				3368	}
				3369	fput(file);
				3370	if (error == -EOPENSTALE) {
				3371	if (flags & LOOKUP_RCU)
				3372	error = -ECHILD;
				3373	else
				3374	error = -ESTALE;
				3375	}
				3376	return ERR_PTR(error);
				3377	}
				3378
				3379	struct file do_filp_open(int dfd, struct filename pathname,
				3380	const struct open_flags *op)
				3381	{
				3382	struct nameidata nd;
				3383	int flags = op->lookup_flags;
				3384	struct file *filp;
				3385
				3386	set_nameidata(&nd, dfd, pathname);
				3387	filp = path_openat(&nd, op, flags \| LOOKUP_RCU);
				3388	if (unlikely(filp == ERR_PTR(-ECHILD)))
				3389	filp = path_openat(&nd, op, flags);
				3390	if (unlikely(filp == ERR_PTR(-ESTALE)))
				3391	filp = path_openat(&nd, op, flags \| LOOKUP_REVAL);
				3392	restore_nameidata();
				3393	return filp;
				3394	}
				3395
				3396	struct file do_file_open_root(struct dentry dentry, struct vfsmount *mnt,
				3397	const char name, const struct open_flags op)
				3398	{
				3399	struct nameidata nd;
				3400	struct file *file;
				3401	struct filename *filename;
				3402	int flags = op->lookup_flags \| LOOKUP_ROOT;
				3403
				3404	nd.root.mnt = mnt;
				3405	nd.root.dentry = dentry;
				3406
				3407	if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
				3408	return ERR_PTR(-ELOOP);
				3409
				3410	filename = getname_kernel(name);
				3411	if (IS_ERR(filename))
				3412	return ERR_CAST(filename);
				3413
				3414	set_nameidata(&nd, -1, filename);
				3415	file = path_openat(&nd, op, flags \| LOOKUP_RCU);
				3416	if (unlikely(file == ERR_PTR(-ECHILD)))
				3417	file = path_openat(&nd, op, flags);
				3418	if (unlikely(file == ERR_PTR(-ESTALE)))
				3419	file = path_openat(&nd, op, flags \| LOOKUP_REVAL);
				3420	restore_nameidata();
				3421	putname(filename);
				3422	return file;
				3423	}
				3424
				3425	static struct dentry filename_create(int dfd, struct filename name,
				3426	struct path *path, unsigned int lookup_flags)
				3427	{
				3428	struct dentry *dentry = ERR_PTR(-EEXIST);
				3429	struct qstr last;
				3430	int type;
				3431	int err2;
				3432	int error;
				3433	bool is_dir = (lookup_flags & LOOKUP_DIRECTORY);
				3434
				3435	/*
				3436	* Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any
				3437	* other flags passed in are ignored!
				3438	*/
				3439	lookup_flags &= LOOKUP_REVAL;
				3440
				3441	name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
				3442	if (IS_ERR(name))
				3443	return ERR_CAST(name);
				3444
				3445	/*
				3446	* Yucky last component or no last component at all?
				3447	* (foo/., foo/.., /////)
				3448	*/
				3449	if (unlikely(type != LAST_NORM))
				3450	goto out;
				3451
				3452	/* don't fail immediately if it's r/o, at least try to report other errors */
				3453	err2 = mnt_want_write(path->mnt);
				3454	/*
				3455	* Do the final lookup.
				3456	*/
				3457	lookup_flags \|= LOOKUP_CREATE \| LOOKUP_EXCL;
				3458	inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
				3459	dentry = __lookup_hash(&last, path->dentry, lookup_flags);
				3460	if (IS_ERR(dentry))
				3461	goto unlock;
				3462
				3463	error = -EEXIST;
				3464	if (d_is_positive(dentry))
				3465	goto fail;
				3466
				3467	/*
				3468	* Special case - lookup gave negative, but... we had foo/bar/
				3469	* From the vfs_mknod() POV we just have a negative dentry -
				3470	* all is fine. Let's be bastards - you had / on the end, you've
				3471	* been asking for (non-existent) directory. -ENOENT for you.
				3472	*/
				3473	if (unlikely(!is_dir && last.name[last.len])) {
				3474	error = -ENOENT;
				3475	goto fail;
				3476	}
				3477	if (unlikely(err2)) {
				3478	error = err2;
				3479	goto fail;
				3480	}
				3481	putname(name);
				3482	return dentry;
				3483	fail:
				3484	dput(dentry);
				3485	dentry = ERR_PTR(error);
				3486	unlock:
				3487	inode_unlock(path->dentry->d_inode);
				3488	if (!err2)
				3489	mnt_drop_write(path->mnt);
				3490	out:
				3491	path_put(path);
				3492	putname(name);
				3493	return dentry;
				3494	}
				3495
				3496	struct dentry kern_path_create(int dfd, const char pathname,
				3497	struct path *path, unsigned int lookup_flags)
				3498	{
				3499	return filename_create(dfd, getname_kernel(pathname),
				3500	path, lookup_flags);
				3501	}
				3502	EXPORT_SYMBOL(kern_path_create);
				3503
				3504	void done_path_create(struct path path, struct dentry dentry)
				3505	{
				3506	dput(dentry);
				3507	inode_unlock(path->dentry->d_inode);
				3508	mnt_drop_write(path->mnt);
				3509	path_put(path);
				3510	}
				3511	EXPORT_SYMBOL(done_path_create);
				3512
				3513	inline struct dentry user_path_create(int dfd, const char __user pathname,
				3514	struct path *path, unsigned int lookup_flags)
				3515	{
				3516	return filename_create(dfd, getname(pathname), path, lookup_flags);
				3517	}
				3518	EXPORT_SYMBOL(user_path_create);
				3519
				3520	int vfs_mknod(struct inode dir, struct dentry dentry, umode_t mode, dev_t dev)
				3521	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3522	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3523	int error = may_create(dir, dentry);
				3524
				3525	if (error)
				3526	return error;
				3527
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3528	if ((S_ISCHR(mode) \|\| S_ISBLK(mode)) && !is_whiteout &&
				3529	!capable(CAP_MKNOD))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3530	return -EPERM;
				3531
				3532	if (!dir->i_op->mknod)
				3533	return -EPERM;
				3534
				3535	error = devcgroup_inode_mknod(mode, dev);
				3536	if (error)
				3537	return error;
				3538
				3539	error = security_inode_mknod(dir, dentry, mode, dev);
				3540	if (error)
				3541	return error;
				3542
				3543	error = dir->i_op->mknod(dir, dentry, mode, dev);
				3544	if (!error)
				3545	fsnotify_create(dir, dentry);
				3546	return error;
				3547	}
				3548	EXPORT_SYMBOL(vfs_mknod);
				3549
				3550	static int may_mknod(umode_t mode)
				3551	{
				3552	switch (mode & S_IFMT) {
				3553	case S_IFREG:
				3554	case S_IFCHR:
				3555	case S_IFBLK:
				3556	case S_IFIFO:
				3557	case S_IFSOCK:
				3558	case 0: /* zero mode translates to S_IFREG */
				3559	return 0;
				3560	case S_IFDIR:
				3561	return -EPERM;
				3562	default:
				3563	return -EINVAL;
				3564	}
				3565	}
				3566
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3567	static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3568	unsigned int dev)
				3569	{
				3570	struct dentry *dentry;
				3571	struct path path;
				3572	int error;
				3573	unsigned int lookup_flags = 0;
				3574
				3575	error = may_mknod(mode);
				3576	if (error)
				3577	return error;
				3578	retry:
				3579	dentry = user_path_create(dfd, filename, &path, lookup_flags);
				3580	if (IS_ERR(dentry))
				3581	return PTR_ERR(dentry);
				3582
				3583	if (!IS_POSIXACL(path.dentry->d_inode))
				3584	mode &= ~current_umask();
				3585	error = security_path_mknod(&path, dentry, mode, dev);
				3586	if (error)
				3587	goto out;
				3588	switch (mode & S_IFMT) {
				3589	case 0: case S_IFREG:
				3590	error = vfs_create(path.dentry->d_inode,dentry,mode,true);
				3591	if (!error)
				3592	ima_post_path_mknod(dentry);
				3593	break;
				3594	case S_IFCHR: case S_IFBLK:
				3595	error = vfs_mknod(path.dentry->d_inode,dentry,mode,
				3596	new_decode_dev(dev));
				3597	break;
				3598	case S_IFIFO: case S_IFSOCK:
				3599	error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
				3600	break;
				3601	}
				3602	out:
				3603	done_path_create(&path, dentry);
				3604	if (retry_estale(error, lookup_flags)) {
				3605	lookup_flags \|= LOOKUP_REVAL;
				3606	goto retry;
				3607	}
				3608	return error;
				3609	}
				3610
				3611	SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
				3612	unsigned int, dev)
				3613	{
				3614	return do_mknodat(dfd, filename, mode, dev);
				3615	}
				3616
				3617	SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
				3618	{
				3619	return do_mknodat(AT_FDCWD, filename, mode, dev);
				3620	}
				3621
				3622	int vfs_mkdir(struct inode dir, struct dentry dentry, umode_t mode)
				3623	{
				3624	int error = may_create(dir, dentry);
				3625	unsigned max_links = dir->i_sb->s_max_links;
				3626
				3627	if (error)
				3628	return error;
				3629
				3630	if (!dir->i_op->mkdir)
				3631	return -EPERM;
				3632
				3633	mode &= (S_IRWXUGO\|S_ISVTX);
				3634	error = security_inode_mkdir(dir, dentry, mode);
				3635	if (error)
				3636	return error;
				3637
				3638	if (max_links && dir->i_nlink >= max_links)
				3639	return -EMLINK;
				3640
				3641	error = dir->i_op->mkdir(dir, dentry, mode);
				3642	if (!error)
				3643	fsnotify_mkdir(dir, dentry);
				3644	return error;
				3645	}
				3646	EXPORT_SYMBOL(vfs_mkdir);
				3647
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3648	static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3649	{
				3650	struct dentry *dentry;
				3651	struct path path;
				3652	int error;
				3653	unsigned int lookup_flags = LOOKUP_DIRECTORY;
				3654
				3655	retry:
				3656	dentry = user_path_create(dfd, pathname, &path, lookup_flags);
				3657	if (IS_ERR(dentry))
				3658	return PTR_ERR(dentry);
				3659
				3660	if (!IS_POSIXACL(path.dentry->d_inode))
				3661	mode &= ~current_umask();
				3662	error = security_path_mkdir(&path, dentry, mode);
				3663	if (!error)
				3664	error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
				3665	done_path_create(&path, dentry);
				3666	if (retry_estale(error, lookup_flags)) {
				3667	lookup_flags \|= LOOKUP_REVAL;
				3668	goto retry;
				3669	}
				3670	return error;
				3671	}
				3672
				3673	SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
				3674	{
				3675	return do_mkdirat(dfd, pathname, mode);
				3676	}
				3677
				3678	SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
				3679	{
				3680	return do_mkdirat(AT_FDCWD, pathname, mode);
				3681	}
				3682
				3683	int vfs_rmdir(struct inode dir, struct dentry dentry)
				3684	{
				3685	int error = may_delete(dir, dentry, 1);
				3686
				3687	if (error)
				3688	return error;
				3689
				3690	if (!dir->i_op->rmdir)
				3691	return -EPERM;
				3692
				3693	dget(dentry);
				3694	inode_lock(dentry->d_inode);
				3695
				3696	error = -EBUSY;
				3697	if (is_local_mountpoint(dentry))
				3698	goto out;
				3699
				3700	error = security_inode_rmdir(dir, dentry);
				3701	if (error)
				3702	goto out;
				3703
				3704	error = dir->i_op->rmdir(dir, dentry);
				3705	if (error)
				3706	goto out;
				3707
				3708	shrink_dcache_parent(dentry);
				3709	dentry->d_inode->i_flags \|= S_DEAD;
				3710	dont_mount(dentry);
				3711	detach_mounts(dentry);
				3712
				3713	out:
				3714	inode_unlock(dentry->d_inode);
				3715	dput(dentry);
				3716	if (!error)
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3717	d_delete_notify(dir, dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3718	return error;
				3719	}
				3720	EXPORT_SYMBOL(vfs_rmdir);
				3721
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3722	long do_rmdir(int dfd, struct filename *name)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3723	{
				3724	int error = 0;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3725	struct dentry *dentry;
				3726	struct path path;
				3727	struct qstr last;
				3728	int type;
				3729	unsigned int lookup_flags = 0;
				3730	retry:
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3731	name = filename_parentat(dfd, name, lookup_flags,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3732	&path, &last, &type);
				3733	if (IS_ERR(name))
				3734	return PTR_ERR(name);
				3735
				3736	switch (type) {
				3737	case LAST_DOTDOT:
				3738	error = -ENOTEMPTY;
				3739	goto exit1;
				3740	case LAST_DOT:
				3741	error = -EINVAL;
				3742	goto exit1;
				3743	case LAST_ROOT:
				3744	error = -EBUSY;
				3745	goto exit1;
				3746	}
				3747
				3748	error = mnt_want_write(path.mnt);
				3749	if (error)
				3750	goto exit1;
				3751
				3752	inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
				3753	dentry = __lookup_hash(&last, path.dentry, lookup_flags);
				3754	error = PTR_ERR(dentry);
				3755	if (IS_ERR(dentry))
				3756	goto exit2;
				3757	if (!dentry->d_inode) {
				3758	error = -ENOENT;
				3759	goto exit3;
				3760	}
				3761	error = security_path_rmdir(&path, dentry);
				3762	if (error)
				3763	goto exit3;
				3764	error = vfs_rmdir(path.dentry->d_inode, dentry);
				3765	exit3:
				3766	dput(dentry);
				3767	exit2:
				3768	inode_unlock(path.dentry->d_inode);
				3769	mnt_drop_write(path.mnt);
				3770	exit1:
				3771	path_put(&path);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3772	if (retry_estale(error, lookup_flags)) {
				3773	lookup_flags \|= LOOKUP_REVAL;
				3774	goto retry;
				3775	}
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3776	putname(name);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3777	return error;
				3778	}
				3779
				3780	SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
				3781	{
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3782	return do_rmdir(AT_FDCWD, getname(pathname));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3783	}
				3784
				3785	/**
				3786	* vfs_unlink - unlink a filesystem object
				3787	* @dir: parent directory
				3788	* @dentry: victim
				3789	* @delegated_inode: returns victim inode, if the inode is delegated.
				3790	*
				3791	* The caller must hold dir->i_mutex.
				3792	*
				3793	* If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
				3794	* return a reference to the inode in delegated_inode. The caller
				3795	* should then break the delegation on that inode and retry. Because
				3796	* breaking a delegation may take a long time, the caller should drop
				3797	* dir->i_mutex before doing so.
				3798	*
				3799	* Alternatively, a caller may pass NULL for delegated_inode. This may
				3800	* be appropriate for callers that expect the underlying filesystem not
				3801	* to be NFS exported.
				3802	*/
				3803	int vfs_unlink(struct inode dir, struct dentry dentry, struct inode **delegated_inode)
				3804	{
				3805	struct inode *target = dentry->d_inode;
				3806	int error = may_delete(dir, dentry, 0);
				3807
				3808	if (error)
				3809	return error;
				3810
				3811	if (!dir->i_op->unlink)
				3812	return -EPERM;
				3813
				3814	inode_lock(target);
				3815	if (is_local_mountpoint(dentry))
				3816	error = -EBUSY;
				3817	else {
				3818	error = security_inode_unlink(dir, dentry);
				3819	if (!error) {
				3820	error = try_break_deleg(target, delegated_inode);
				3821	if (error)
				3822	goto out;
				3823	error = dir->i_op->unlink(dir, dentry);
				3824	if (!error) {
				3825	dont_mount(dentry);
				3826	detach_mounts(dentry);
				3827	}
				3828	}
				3829	}
				3830	out:
				3831	inode_unlock(target);
				3832
				3833	/* We don't d_delete() NFS sillyrenamed files--they still exist. */
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3834	if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
				3835	fsnotify_unlink(dir, dentry);
				3836	} else if (!error) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3837	fsnotify_link_count(target);
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3838	d_delete_notify(dir, dentry);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3839	}
				3840
				3841	return error;
				3842	}
				3843	EXPORT_SYMBOL(vfs_unlink);
				3844
				3845	/*
				3846	* Make sure that the actual truncation of the file will occur outside its
				3847	* directory's i_mutex. Truncate can take a long time if there is a lot of
				3848	* writeout happening, and we don't want to prevent access to the directory
				3849	* while waiting on the I/O.
				3850	*/
				3851	long do_unlinkat(int dfd, struct filename *name)
				3852	{
				3853	int error;
				3854	struct dentry *dentry;
				3855	struct path path;
				3856	struct qstr last;
				3857	int type;
				3858	struct inode *inode = NULL;
				3859	struct inode *delegated_inode = NULL;
				3860	unsigned int lookup_flags = 0;
				3861	retry:
				3862	name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
				3863	if (IS_ERR(name))
				3864	return PTR_ERR(name);
				3865
				3866	error = -EISDIR;
				3867	if (type != LAST_NORM)
				3868	goto exit1;
				3869
				3870	error = mnt_want_write(path.mnt);
				3871	if (error)
				3872	goto exit1;
				3873	retry_deleg:
				3874	inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
				3875	dentry = __lookup_hash(&last, path.dentry, lookup_flags);
				3876	error = PTR_ERR(dentry);
				3877	if (!IS_ERR(dentry)) {
				3878	/* Why not before? Because we want correct error value */
				3879	if (last.name[last.len])
				3880	goto slashes;
				3881	inode = dentry->d_inode;
				3882	if (d_is_negative(dentry))
				3883	goto slashes;
				3884	ihold(inode);
				3885	error = security_path_unlink(&path, dentry);
				3886	if (error)
				3887	goto exit2;
				3888	error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
				3889	exit2:
				3890	dput(dentry);
				3891	}
				3892	inode_unlock(path.dentry->d_inode);
				3893	if (inode)
				3894	iput(inode); /* truncate the inode here */
				3895	inode = NULL;
				3896	if (delegated_inode) {
				3897	error = break_deleg_wait(&delegated_inode);
				3898	if (!error)
				3899	goto retry_deleg;
				3900	}
				3901	mnt_drop_write(path.mnt);
				3902	exit1:
				3903	path_put(&path);
				3904	if (retry_estale(error, lookup_flags)) {
				3905	lookup_flags \|= LOOKUP_REVAL;
				3906	inode = NULL;
				3907	goto retry;
				3908	}
				3909	putname(name);
				3910	return error;
				3911
				3912	slashes:
				3913	if (d_is_negative(dentry))
				3914	error = -ENOENT;
				3915	else if (d_is_dir(dentry))
				3916	error = -EISDIR;
				3917	else
				3918	error = -ENOTDIR;
				3919	goto exit2;
				3920	}
				3921
				3922	SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
				3923	{
				3924	if ((flag & ~AT_REMOVEDIR) != 0)
				3925	return -EINVAL;
				3926
				3927	if (flag & AT_REMOVEDIR)
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3928	return do_rmdir(dfd, getname(pathname));
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3929	return do_unlinkat(dfd, getname(pathname));
				3930	}
				3931
				3932	SYSCALL_DEFINE1(unlink, const char __user *, pathname)
				3933	{
				3934	return do_unlinkat(AT_FDCWD, getname(pathname));
				3935	}
				3936
				3937	int vfs_symlink(struct inode dir, struct dentry dentry, const char *oldname)
				3938	{
				3939	int error = may_create(dir, dentry);
				3940
				3941	if (error)
				3942	return error;
				3943
				3944	if (!dir->i_op->symlink)
				3945	return -EPERM;
				3946
				3947	error = security_inode_symlink(dir, dentry, oldname);
				3948	if (error)
				3949	return error;
				3950
				3951	error = dir->i_op->symlink(dir, dentry, oldname);
				3952	if (!error)
				3953	fsnotify_create(dir, dentry);
				3954	return error;
				3955	}
				3956	EXPORT_SYMBOL(vfs_symlink);
				3957
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	3958	static long do_symlinkat(const char __user *oldname, int newdfd,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	3959	const char __user *newname)
				3960	{
				3961	int error;
				3962	struct filename *from;
				3963	struct dentry *dentry;
				3964	struct path path;
				3965	unsigned int lookup_flags = 0;
				3966
				3967	from = getname(oldname);
				3968	if (IS_ERR(from))
				3969	return PTR_ERR(from);
				3970	retry:
				3971	dentry = user_path_create(newdfd, newname, &path, lookup_flags);
				3972	error = PTR_ERR(dentry);
				3973	if (IS_ERR(dentry))
				3974	goto out_putname;
				3975
				3976	error = security_path_symlink(&path, dentry, from->name);
				3977	if (!error)
				3978	error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
				3979	done_path_create(&path, dentry);
				3980	if (retry_estale(error, lookup_flags)) {
				3981	lookup_flags \|= LOOKUP_REVAL;
				3982	goto retry;
				3983	}
				3984	out_putname:
				3985	putname(from);
				3986	return error;
				3987	}
				3988
				3989	SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
				3990	int, newdfd, const char __user *, newname)
				3991	{
				3992	return do_symlinkat(oldname, newdfd, newname);
				3993	}
				3994
				3995	SYSCALL_DEFINE2(symlink, const char __user , oldname, const char __user , newname)
				3996	{
				3997	return do_symlinkat(oldname, AT_FDCWD, newname);
				3998	}
				3999
				4000	/**
				4001	* vfs_link - create a new link
				4002	* @old_dentry: object to be linked
				4003	* @dir: new parent
				4004	* @new_dentry: where to create the new link
				4005	* @delegated_inode: returns inode needing a delegation break
				4006	*
				4007	* The caller must hold dir->i_mutex
				4008	*
				4009	* If vfs_link discovers a delegation on the to-be-linked file in need
				4010	* of breaking, it will return -EWOULDBLOCK and return a reference to the
				4011	* inode in delegated_inode. The caller should then break the delegation
				4012	* and retry. Because breaking a delegation may take a long time, the
				4013	* caller should drop the i_mutex before doing so.
				4014	*
				4015	* Alternatively, a caller may pass NULL for delegated_inode. This may
				4016	* be appropriate for callers that expect the underlying filesystem not
				4017	* to be NFS exported.
				4018	*/
				4019	int vfs_link(struct dentry old_dentry, struct inode dir, struct dentry new_dentry, struct inode *delegated_inode)
				4020	{
				4021	struct inode *inode = old_dentry->d_inode;
				4022	unsigned max_links = dir->i_sb->s_max_links;
				4023	int error;
				4024
				4025	if (!inode)
				4026	return -ENOENT;
				4027
				4028	error = may_create(dir, new_dentry);
				4029	if (error)
				4030	return error;
				4031
				4032	if (dir->i_sb != inode->i_sb)
				4033	return -EXDEV;
				4034
				4035	/*
				4036	* A link to an append-only or immutable file cannot be created.
				4037	*/
				4038	if (IS_APPEND(inode) \|\| IS_IMMUTABLE(inode))
				4039	return -EPERM;
				4040	/*
				4041	* Updating the link count will likely cause i_uid and i_gid to
				4042	* be writen back improperly if their true value is unknown to
				4043	* the vfs.
				4044	*/
				4045	if (HAS_UNMAPPED_ID(inode))
				4046	return -EPERM;
				4047	if (!dir->i_op->link)
				4048	return -EPERM;
				4049	if (S_ISDIR(inode->i_mode))
				4050	return -EPERM;
				4051
				4052	error = security_inode_link(old_dentry, dir, new_dentry);
				4053	if (error)
				4054	return error;
				4055
				4056	inode_lock(inode);
				4057	/* Make sure we don't allow creating hardlink to an unlinked file */
				4058	if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
				4059	error = -ENOENT;
				4060	else if (max_links && inode->i_nlink >= max_links)
				4061	error = -EMLINK;
				4062	else {
				4063	error = try_break_deleg(inode, delegated_inode);
				4064	if (!error)
				4065	error = dir->i_op->link(old_dentry, dir, new_dentry);
				4066	}
				4067
				4068	if (!error && (inode->i_state & I_LINKABLE)) {
				4069	spin_lock(&inode->i_lock);
				4070	inode->i_state &= ~I_LINKABLE;
				4071	spin_unlock(&inode->i_lock);
				4072	}
				4073	inode_unlock(inode);
				4074	if (!error)
				4075	fsnotify_link(dir, inode, new_dentry);
				4076	return error;
				4077	}
				4078	EXPORT_SYMBOL(vfs_link);
				4079
				4080	/*
				4081	* Hardlinks are often used in delicate situations. We avoid
				4082	* security-related surprises by not following symlinks on the
				4083	* newname. --KAB
				4084	*
				4085	* We don't follow them on the oldname either to be compatible
				4086	* with linux 2.0, and to avoid hard-linking to directories
				4087	* and other special files. --ADM
				4088	*/
Olivier Deprez	157378f	2022-04-04 15:47:50 +0200	[diff] [blame^]	4089	static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4090	const char __user *newname, int flags)
				4091	{
				4092	struct dentry *new_dentry;
				4093	struct path old_path, new_path;
				4094	struct inode *delegated_inode = NULL;
				4095	int how = 0;
				4096	int error;
				4097
				4098	if ((flags & ~(AT_SYMLINK_FOLLOW \| AT_EMPTY_PATH)) != 0)
				4099	return -EINVAL;
				4100	/*
				4101	* To use null names we require CAP_DAC_READ_SEARCH
				4102	* This ensures that not everyone will be able to create
				4103	* handlink using the passed filedescriptor.
				4104	*/
				4105	if (flags & AT_EMPTY_PATH) {
				4106	if (!capable(CAP_DAC_READ_SEARCH))
				4107	return -ENOENT;
				4108	how = LOOKUP_EMPTY;
				4109	}
				4110
				4111	if (flags & AT_SYMLINK_FOLLOW)
				4112	how \|= LOOKUP_FOLLOW;
				4113	retry:
				4114	error = user_path_at(olddfd, oldname, how, &old_path);
				4115	if (error)
				4116	return error;
				4117
				4118	new_dentry = user_path_create(newdfd, newname, &new_path,
				4119	(how & LOOKUP_REVAL));
				4120	error = PTR_ERR(new_dentry);
				4121	if (IS_ERR(new_dentry))
				4122	goto out;
				4123
				4124	error = -EXDEV;
				4125	if (old_path.mnt != new_path.mnt)
				4126	goto out_dput;
				4127	error = may_linkat(&old_path);
				4128	if (unlikely(error))
				4129	goto out_dput;
				4130	error = security_path_link(old_path.dentry, &new_path, new_dentry);
				4131	if (error)
				4132	goto out_dput;
				4133	error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
				4134	out_dput:
				4135	done_path_create(&new_path, new_dentry);
				4136	if (delegated_inode) {
				4137	error = break_deleg_wait(&delegated_inode);
				4138	if (!error) {
				4139	path_put(&old_path);
				4140	goto retry;
				4141	}
				4142	}
				4143	if (retry_estale(error, how)) {
				4144	path_put(&old_path);
				4145	how \|= LOOKUP_REVAL;
				4146	goto retry;
				4147	}
				4148	out:
				4149	path_put(&old_path);
				4150
				4151	return error;
				4152	}
				4153
				4154	SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
				4155	int, newdfd, const char __user *, newname, int, flags)
				4156	{
				4157	return do_linkat(olddfd, oldname, newdfd, newname, flags);
				4158	}
				4159
				4160	SYSCALL_DEFINE2(link, const char __user , oldname, const char __user , newname)
				4161	{
				4162	return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
				4163	}
				4164
				4165	/**
				4166	* vfs_rename - rename a filesystem object
				4167	* @old_dir: parent of source
				4168	* @old_dentry: source
				4169	* @new_dir: parent of destination
				4170	* @new_dentry: destination
				4171	* @delegated_inode: returns an inode needing a delegation break
				4172	* @flags: rename flags
				4173	*
				4174	* The caller must hold multiple mutexes--see lock_rename()).
				4175	*
				4176	* If vfs_rename discovers a delegation in need of breaking at either
				4177	* the source or destination, it will return -EWOULDBLOCK and return a
				4178	* reference to the inode in delegated_inode. The caller should then
				4179	* break the delegation and retry. Because breaking a delegation may
				4180	* take a long time, the caller should drop all locks before doing
				4181	* so.
				4182	*
				4183	* Alternatively, a caller may pass NULL for delegated_inode. This may
				4184	* be appropriate for callers that expect the underlying filesystem not
				4185	* to be NFS exported.
				4186	*
				4187	* The worst of all namespace operations - renaming directory. "Perverted"
				4188	* doesn't even start to describe it. Somebody in UCB had a heck of a trip...
				4189	* Problems:
				4190	*
				4191	* a) we can get into loop creation.
				4192	* b) race potential - two innocent renames can create a loop together.
				4193	* That's where 4.4 screws up. Current fix: serialization on
				4194	* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
				4195	* story.
				4196	* c) we have to lock _four_ objects - parents and victim (if it exists),
				4197	* and source (if it is not a directory).
				4198	* And that - after we got ->i_mutex on parents (until then we don't know
				4199	* whether the target exists). Solution: try to be smart with locking
				4200	* order for inodes. We rely on the fact that tree topology may change
				4201	* only under ->s_vfs_rename_mutex _and_ that parent of the object we
				4202	* move will be locked. Thus we can rank directories by the tree
				4203	* (ancestors first) and rank all non-directories after them.
				4204	* That works since everybody except rename does "lock parent, lookup,
				4205	* lock child" and rename is under ->s_vfs_rename_mutex.
				4206	* HOWEVER, it relies on the assumption that any object with ->lookup()
				4207	* has no more than 1 dentry. If "hybrid" objects will ever appear,
				4208	* we'd better make sure that there's no link(2) for them.
				4209	* d) conversion from fhandle to dentry may come in the wrong moment - when
				4210	* we are removing the target. Solution: we will have to grab ->i_mutex
				4211	* in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
				4212	* ->i_mutex on parents, which works but leads to some truly excessive
				4213	* locking].
				4214	*/
				4215	int vfs_rename(struct inode old_dir, struct dentry old_dentry,
				4216	struct inode new_dir, struct dentry new_dentry,
				4217	struct inode **delegated_inode, unsigned int flags)
				4218	{
				4219	int error;
				4220	bool is_dir = d_is_dir(old_dentry);
				4221	struct inode *source = old_dentry->d_inode;
				4222	struct inode *target = new_dentry->d_inode;
				4223	bool new_is_dir = false;
				4224	unsigned max_links = new_dir->i_sb->s_max_links;
				4225	struct name_snapshot old_name;
				4226
				4227	if (source == target)
				4228	return 0;
				4229
				4230	error = may_delete(old_dir, old_dentry, is_dir);
				4231	if (error)
				4232	return error;
				4233
				4234	if (!target) {
				4235	error = may_create(new_dir, new_dentry);
				4236	} else {
				4237	new_is_dir = d_is_dir(new_dentry);
				4238
				4239	if (!(flags & RENAME_EXCHANGE))
				4240	error = may_delete(new_dir, new_dentry, is_dir);
				4241	else
				4242	error = may_delete(new_dir, new_dentry, new_is_dir);
				4243	}
				4244	if (error)
				4245	return error;
				4246
				4247	if (!old_dir->i_op->rename)
				4248	return -EPERM;
				4249
				4250	/*
				4251	* If we are going to change the parent - check write permissions,
				4252	* we'll need to flip '..'.
				4253	*/
				4254	if (new_dir != old_dir) {
				4255	if (is_dir) {
				4256	error = inode_permission(source, MAY_WRITE);
				4257	if (error)
				4258	return error;
				4259	}
				4260	if ((flags & RENAME_EXCHANGE) && new_is_dir) {
				4261	error = inode_permission(target, MAY_WRITE);
				4262	if (error)
				4263	return error;
				4264	}
				4265	}
				4266
				4267	error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
				4268	flags);
				4269	if (error)
				4270	return error;
				4271
				4272	take_dentry_name_snapshot(&old_name, old_dentry);
				4273	dget(new_dentry);
				4274	if (!is_dir \|\| (flags & RENAME_EXCHANGE))
				4275	lock_two_nondirectories(source, target);
				4276	else if (target)
				4277	inode_lock(target);
				4278
				4279	error = -EBUSY;
				4280	if (is_local_mountpoint(old_dentry) \|\| is_local_mountpoint(new_dentry))
				4281	goto out;
				4282
				4283	if (max_links && new_dir != old_dir) {
				4284	error = -EMLINK;
				4285	if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
				4286	goto out;
				4287	if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
				4288	old_dir->i_nlink >= max_links)
				4289	goto out;
				4290	}
				4291	if (!is_dir) {
				4292	error = try_break_deleg(source, delegated_inode);
				4293	if (error)
				4294	goto out;
				4295	}
				4296	if (target && !new_is_dir) {
				4297	error = try_break_deleg(target, delegated_inode);
				4298	if (error)
				4299	goto out;
				4300	}
				4301	error = old_dir->i_op->rename(old_dir, old_dentry,
				4302	new_dir, new_dentry, flags);
				4303	if (error)
				4304	goto out;
				4305
				4306	if (!(flags & RENAME_EXCHANGE) && target) {
				4307	if (is_dir) {
				4308	shrink_dcache_parent(new_dentry);
				4309	target->i_flags \|= S_DEAD;
				4310	}
				4311	dont_mount(new_dentry);
				4312	detach_mounts(new_dentry);
				4313	}
				4314	if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
				4315	if (!(flags & RENAME_EXCHANGE))
				4316	d_move(old_dentry, new_dentry);
				4317	else
				4318	d_exchange(old_dentry, new_dentry);
				4319	}
				4320	out:
				4321	if (!is_dir \|\| (flags & RENAME_EXCHANGE))
				4322	unlock_two_nondirectories(source, target);
				4323	else if (target)
				4324	inode_unlock(target);
				4325	dput(new_dentry);
				4326	if (!error) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4327	fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4328	!(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
				4329	if (flags & RENAME_EXCHANGE) {
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4330	fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4331	new_is_dir, NULL, new_dentry);
				4332	}
				4333	}
				4334	release_dentry_name_snapshot(&old_name);
				4335
				4336	return error;
				4337	}
				4338	EXPORT_SYMBOL(vfs_rename);
				4339
				4340	static int do_renameat2(int olddfd, const char __user *oldname, int newdfd,
				4341	const char __user *newname, unsigned int flags)
				4342	{
				4343	struct dentry old_dentry, new_dentry;
				4344	struct dentry *trap;
				4345	struct path old_path, new_path;
				4346	struct qstr old_last, new_last;
				4347	int old_type, new_type;
				4348	struct inode *delegated_inode = NULL;
				4349	struct filename *from;
				4350	struct filename *to;
				4351	unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
				4352	bool should_retry = false;
				4353	int error;
				4354
				4355	if (flags & ~(RENAME_NOREPLACE \| RENAME_EXCHANGE \| RENAME_WHITEOUT))
				4356	return -EINVAL;
				4357
				4358	if ((flags & (RENAME_NOREPLACE \| RENAME_WHITEOUT)) &&
				4359	(flags & RENAME_EXCHANGE))
				4360	return -EINVAL;
				4361
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4362	if (flags & RENAME_EXCHANGE)
				4363	target_flags = 0;
				4364
				4365	retry:
				4366	from = filename_parentat(olddfd, getname(oldname), lookup_flags,
				4367	&old_path, &old_last, &old_type);
				4368	if (IS_ERR(from)) {
				4369	error = PTR_ERR(from);
				4370	goto exit;
				4371	}
				4372
				4373	to = filename_parentat(newdfd, getname(newname), lookup_flags,
				4374	&new_path, &new_last, &new_type);
				4375	if (IS_ERR(to)) {
				4376	error = PTR_ERR(to);
				4377	goto exit1;
				4378	}
				4379
				4380	error = -EXDEV;
				4381	if (old_path.mnt != new_path.mnt)
				4382	goto exit2;
				4383
				4384	error = -EBUSY;
				4385	if (old_type != LAST_NORM)
				4386	goto exit2;
				4387
				4388	if (flags & RENAME_NOREPLACE)
				4389	error = -EEXIST;
				4390	if (new_type != LAST_NORM)
				4391	goto exit2;
				4392
				4393	error = mnt_want_write(old_path.mnt);
				4394	if (error)
				4395	goto exit2;
				4396
				4397	retry_deleg:
				4398	trap = lock_rename(new_path.dentry, old_path.dentry);
				4399
				4400	old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
				4401	error = PTR_ERR(old_dentry);
				4402	if (IS_ERR(old_dentry))
				4403	goto exit3;
				4404	/* source must exist */
				4405	error = -ENOENT;
				4406	if (d_is_negative(old_dentry))
				4407	goto exit4;
				4408	new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags \| target_flags);
				4409	error = PTR_ERR(new_dentry);
				4410	if (IS_ERR(new_dentry))
				4411	goto exit4;
				4412	error = -EEXIST;
				4413	if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
				4414	goto exit5;
				4415	if (flags & RENAME_EXCHANGE) {
				4416	error = -ENOENT;
				4417	if (d_is_negative(new_dentry))
				4418	goto exit5;
				4419
				4420	if (!d_is_dir(new_dentry)) {
				4421	error = -ENOTDIR;
				4422	if (new_last.name[new_last.len])
				4423	goto exit5;
				4424	}
				4425	}
				4426	/* unless the source is a directory trailing slashes give -ENOTDIR */
				4427	if (!d_is_dir(old_dentry)) {
				4428	error = -ENOTDIR;
				4429	if (old_last.name[old_last.len])
				4430	goto exit5;
				4431	if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
				4432	goto exit5;
				4433	}
				4434	/* source should not be ancestor of target */
				4435	error = -EINVAL;
				4436	if (old_dentry == trap)
				4437	goto exit5;
				4438	/* target should not be an ancestor of source */
				4439	if (!(flags & RENAME_EXCHANGE))
				4440	error = -ENOTEMPTY;
				4441	if (new_dentry == trap)
				4442	goto exit5;
				4443
				4444	error = security_path_rename(&old_path, old_dentry,
				4445	&new_path, new_dentry, flags);
				4446	if (error)
				4447	goto exit5;
				4448	error = vfs_rename(old_path.dentry->d_inode, old_dentry,
				4449	new_path.dentry->d_inode, new_dentry,
				4450	&delegated_inode, flags);
				4451	exit5:
				4452	dput(new_dentry);
				4453	exit4:
				4454	dput(old_dentry);
				4455	exit3:
				4456	unlock_rename(new_path.dentry, old_path.dentry);
				4457	if (delegated_inode) {
				4458	error = break_deleg_wait(&delegated_inode);
				4459	if (!error)
				4460	goto retry_deleg;
				4461	}
				4462	mnt_drop_write(old_path.mnt);
				4463	exit2:
				4464	if (retry_estale(error, lookup_flags))
				4465	should_retry = true;
				4466	path_put(&new_path);
				4467	putname(to);
				4468	exit1:
				4469	path_put(&old_path);
				4470	putname(from);
				4471	if (should_retry) {
				4472	should_retry = false;
				4473	lookup_flags \|= LOOKUP_REVAL;
				4474	goto retry;
				4475	}
				4476	exit:
				4477	return error;
				4478	}
				4479
				4480	SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
				4481	int, newdfd, const char __user *, newname, unsigned int, flags)
				4482	{
				4483	return do_renameat2(olddfd, oldname, newdfd, newname, flags);
				4484	}
				4485
				4486	SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
				4487	int, newdfd, const char __user *, newname)
				4488	{
				4489	return do_renameat2(olddfd, oldname, newdfd, newname, 0);
				4490	}
				4491
				4492	SYSCALL_DEFINE2(rename, const char __user , oldname, const char __user , newname)
				4493	{
				4494	return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
				4495	}
				4496
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4497	int readlink_copy(char __user buffer, int buflen, const char link)
				4498	{
				4499	int len = PTR_ERR(link);
				4500	if (IS_ERR(link))
				4501	goto out;
				4502
				4503	len = strlen(link);
				4504	if (len > (unsigned) buflen)
				4505	len = buflen;
				4506	if (copy_to_user(buffer, link, len))
				4507	len = -EFAULT;
				4508	out:
				4509	return len;
				4510	}
				4511
				4512	/**
				4513	* vfs_readlink - copy symlink body into userspace buffer
				4514	* @dentry: dentry on which to get symbolic link
				4515	* @buffer: user memory pointer
				4516	* @buflen: size of buffer
				4517	*
				4518	* Does not touch atime. That's up to the caller if necessary
				4519	*
				4520	* Does not call security hook.
				4521	*/
				4522	int vfs_readlink(struct dentry dentry, char __user buffer, int buflen)
				4523	{
				4524	struct inode *inode = d_inode(dentry);
				4525	DEFINE_DELAYED_CALL(done);
				4526	const char *link;
				4527	int res;
				4528
				4529	if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
				4530	if (unlikely(inode->i_op->readlink))
				4531	return inode->i_op->readlink(dentry, buffer, buflen);
				4532
				4533	if (!d_is_symlink(dentry))
				4534	return -EINVAL;
				4535
				4536	spin_lock(&inode->i_lock);
				4537	inode->i_opflags \|= IOP_DEFAULT_READLINK;
				4538	spin_unlock(&inode->i_lock);
				4539	}
				4540
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	4541	link = READ_ONCE(inode->i_link);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	4542	if (!link) {
				4543	link = inode->i_op->get_link(dentry, inode, &done);
				4544	if (IS_ERR(link))
				4545	return PTR_ERR(link);
				4546	}
				4547	res = readlink_copy(buffer, buflen, link);
				4548	do_delayed_call(&done);
				4549	return res;
				4550	}
				4551	EXPORT_SYMBOL(vfs_readlink);
				4552
				4553	/**
				4554	* vfs_get_link - get symlink body
				4555	* @dentry: dentry on which to get symbolic link
				4556	* @done: caller needs to free returned data with this
				4557	*
				4558	* Calls security hook and i_op->get_link() on the supplied inode.
				4559	*
				4560	* It does not touch atime. That's up to the caller if necessary.
				4561	*
				4562	* Does not work on "special" symlinks like /proc/$$/fd/N
				4563	*/
				4564	const char vfs_get_link(struct dentry dentry, struct delayed_call *done)
				4565	{
				4566	const char *res = ERR_PTR(-EINVAL);
				4567	struct inode *inode = d_inode(dentry);
				4568
				4569	if (d_is_symlink(dentry)) {
				4570	res = ERR_PTR(security_inode_readlink(dentry));
				4571	if (!res)
				4572	res = inode->i_op->get_link(dentry, inode, done);
				4573	}
				4574	return res;
				4575	}
				4576	EXPORT_SYMBOL(vfs_get_link);
				4577
				4578	/* get the link contents into pagecache */
				4579	const char page_get_link(struct dentry dentry, struct inode *inode,
				4580	struct delayed_call *callback)
				4581	{
				4582	char *kaddr;
				4583	struct page *page;
				4584	struct address_space *mapping = inode->i_mapping;
				4585
				4586	if (!dentry) {
				4587	page = find_get_page(mapping, 0);
				4588	if (!page)
				4589	return ERR_PTR(-ECHILD);
				4590	if (!PageUptodate(page)) {
				4591	put_page(page);
				4592	return ERR_PTR(-ECHILD);
				4593	}
				4594	} else {
				4595	page = read_mapping_page(mapping, 0, NULL);
				4596	if (IS_ERR(page))
				4597	return (char*)page;
				4598	}
				4599	set_delayed_call(callback, page_put_link, page);
				4600	BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
				4601	kaddr = page_address(page);
				4602	nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
				4603	return kaddr;
				4604	}
				4605
				4606	EXPORT_SYMBOL(page_get_link);
				4607
				4608	void page_put_link(void *arg)
				4609	{
				4610	put_page(arg);
				4611	}
				4612	EXPORT_SYMBOL(page_put_link);
				4613
				4614	int page_readlink(struct dentry dentry, char __user buffer, int buflen)
				4615	{
				4616	DEFINE_DELAYED_CALL(done);
				4617	int res = readlink_copy(buffer, buflen,
				4618	page_get_link(dentry, d_inode(dentry),
				4619	&done));
				4620	do_delayed_call(&done);
				4621	return res;
				4622	}
				4623	EXPORT_SYMBOL(page_readlink);
				4624
				4625	/*
				4626	* The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
				4627	*/
				4628	int __page_symlink(struct inode inode, const char symname, int len, int nofs)
				4629	{
				4630	struct address_space *mapping = inode->i_mapping;
				4631	struct page *page;
				4632	void *fsdata;
				4633	int err;
				4634	unsigned int flags = 0;
				4635	if (nofs)
				4636	flags \|= AOP_FLAG_NOFS;
				4637
				4638	retry:
				4639	err = pagecache_write_begin(NULL, mapping, 0, len-1,
				4640	flags, &page, &fsdata);
				4641	if (err)
				4642	goto fail;
				4643
				4644	memcpy(page_address(page), symname, len-1);
				4645
				4646	err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
				4647	page, fsdata);
				4648	if (err < 0)
				4649	goto fail;
				4650	if (err < len-1)
				4651	goto retry;
				4652
				4653	mark_inode_dirty(inode);
				4654	return 0;
				4655	fail:
				4656	return err;
				4657	}
				4658	EXPORT_SYMBOL(__page_symlink);
				4659
				4660	int page_symlink(struct inode inode, const char symname, int len)
				4661	{
				4662	return __page_symlink(inode, symname, len,
				4663	!mapping_gfp_constraint(inode->i_mapping, __GFP_FS));
				4664	}
				4665	EXPORT_SYMBOL(page_symlink);
				4666
				4667	const struct inode_operations page_symlink_inode_operations = {
				4668	.get_link = page_get_link,
				4669	};
				4670	EXPORT_SYMBOL(page_symlink_inode_operations);