Blame - fs/overlayfs/super.c - hafnium/third_party/linux.git

blob: 0fb0a59a5e5c2f7191754cc9fae16d34c42d5c32 [file] [log] [blame]

Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1	/*
				2	*
				3	* Copyright (C) 2011 Novell Inc.
				4	*
				5	* This program is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 as published by
				7	* the Free Software Foundation.
				8	*/
				9
				10	#include <uapi/linux/magic.h>
				11	#include <linux/fs.h>
				12	#include <linux/namei.h>
				13	#include <linux/xattr.h>
				14	#include <linux/mount.h>
				15	#include <linux/parser.h>
				16	#include <linux/module.h>
				17	#include <linux/statfs.h>
				18	#include <linux/seq_file.h>
				19	#include <linux/posix_acl_xattr.h>
				20	#include <linux/exportfs.h>
				21	#include "overlayfs.h"
				22
				23	MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
				24	MODULE_DESCRIPTION("Overlay filesystem");
				25	MODULE_LICENSE("GPL");
				26
				27
				28	struct ovl_dir_cache;
				29
				30	#define OVL_MAX_STACK 500
				31
				32	static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
				33	module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
				34	MODULE_PARM_DESC(ovl_redirect_dir_def,
				35	"Default to on or off for the redirect_dir feature");
				36
				37	static bool ovl_redirect_always_follow =
				38	IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
				39	module_param_named(redirect_always_follow, ovl_redirect_always_follow,
				40	bool, 0644);
				41	MODULE_PARM_DESC(ovl_redirect_always_follow,
				42	"Follow redirects even if redirect_dir feature is turned off");
				43
				44	static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
				45	module_param_named(index, ovl_index_def, bool, 0644);
				46	MODULE_PARM_DESC(ovl_index_def,
				47	"Default to on or off for the inodes index feature");
				48
				49	static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
				50	module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
				51	MODULE_PARM_DESC(ovl_nfs_export_def,
				52	"Default to on or off for the NFS export feature");
				53
				54	static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
				55	module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
				56	MODULE_PARM_DESC(ovl_xino_auto_def,
				57	"Auto enable xino feature");
				58
				59	static void ovl_entry_stack_free(struct ovl_entry *oe)
				60	{
				61	unsigned int i;
				62
				63	for (i = 0; i < oe->numlower; i++)
				64	dput(oe->lowerstack[i].dentry);
				65	}
				66
				67	static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
				68	module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
				69	MODULE_PARM_DESC(ovl_metacopy_def,
				70	"Default to on or off for the metadata only copy up feature");
				71
				72	static void ovl_dentry_release(struct dentry *dentry)
				73	{
				74	struct ovl_entry *oe = dentry->d_fsdata;
				75
				76	if (oe) {
				77	ovl_entry_stack_free(oe);
				78	kfree_rcu(oe, rcu);
				79	}
				80	}
				81
				82	static struct dentry ovl_d_real(struct dentry dentry,
				83	const struct inode *inode)
				84	{
				85	struct dentry *real;
				86
				87	/* It's an overlay file */
				88	if (inode && d_inode(dentry) == inode)
				89	return dentry;
				90
				91	if (!d_is_reg(dentry)) {
				92	if (!inode \|\| inode == d_inode(dentry))
				93	return dentry;
				94	goto bug;
				95	}
				96
				97	real = ovl_dentry_upper(dentry);
				98	if (real && (inode == d_inode(real)))
				99	return real;
				100
				101	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
				102	return real;
				103
				104	real = ovl_dentry_lowerdata(dentry);
				105	if (!real)
				106	goto bug;
				107
				108	/* Handle recursion */
				109	real = d_real(real, inode);
				110
				111	if (!inode \|\| inode == d_inode(real))
				112	return real;
				113	bug:
				114	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
				115	inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
				116	return dentry;
				117	}
				118
				119	static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
				120	{
				121	struct ovl_entry *oe = dentry->d_fsdata;
				122	unsigned int i;
				123	int ret = 1;
				124
				125	for (i = 0; i < oe->numlower; i++) {
				126	struct dentry *d = oe->lowerstack[i].dentry;
				127
				128	if (d->d_flags & DCACHE_OP_REVALIDATE) {
				129	ret = d->d_op->d_revalidate(d, flags);
				130	if (ret < 0)
				131	return ret;
				132	if (!ret) {
				133	if (!(flags & LOOKUP_RCU))
				134	d_invalidate(d);
				135	return -ESTALE;
				136	}
				137	}
				138	}
				139	return 1;
				140	}
				141
				142	static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
				143	{
				144	struct ovl_entry *oe = dentry->d_fsdata;
				145	unsigned int i;
				146	int ret = 1;
				147
				148	for (i = 0; i < oe->numlower; i++) {
				149	struct dentry *d = oe->lowerstack[i].dentry;
				150
				151	if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) {
				152	ret = d->d_op->d_weak_revalidate(d, flags);
				153	if (ret <= 0)
				154	break;
				155	}
				156	}
				157	return ret;
				158	}
				159
				160	static const struct dentry_operations ovl_dentry_operations = {
				161	.d_release = ovl_dentry_release,
				162	.d_real = ovl_d_real,
				163	};
				164
				165	static const struct dentry_operations ovl_reval_dentry_operations = {
				166	.d_release = ovl_dentry_release,
				167	.d_real = ovl_d_real,
				168	.d_revalidate = ovl_dentry_revalidate,
				169	.d_weak_revalidate = ovl_dentry_weak_revalidate,
				170	};
				171
				172	static struct kmem_cache *ovl_inode_cachep;
				173
				174	static struct inode ovl_alloc_inode(struct super_block sb)
				175	{
				176	struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
				177
				178	if (!oi)
				179	return NULL;
				180
				181	oi->cache = NULL;
				182	oi->redirect = NULL;
				183	oi->version = 0;
				184	oi->flags = 0;
				185	oi->__upperdentry = NULL;
				186	oi->lower = NULL;
				187	oi->lowerdata = NULL;
				188	mutex_init(&oi->lock);
				189
				190	return &oi->vfs_inode;
				191	}
				192
				193	static void ovl_i_callback(struct rcu_head *head)
				194	{
				195	struct inode *inode = container_of(head, struct inode, i_rcu);
				196
				197	kmem_cache_free(ovl_inode_cachep, OVL_I(inode));
				198	}
				199
				200	static void ovl_destroy_inode(struct inode *inode)
				201	{
				202	struct ovl_inode *oi = OVL_I(inode);
				203
				204	dput(oi->__upperdentry);
				205	iput(oi->lower);
				206	if (S_ISDIR(inode->i_mode))
				207	ovl_dir_cache_free(inode);
				208	else
				209	iput(oi->lowerdata);
				210	kfree(oi->redirect);
				211	mutex_destroy(&oi->lock);
				212
				213	call_rcu(&inode->i_rcu, ovl_i_callback);
				214	}
				215
				216	static void ovl_free_fs(struct ovl_fs *ofs)
				217	{
				218	unsigned i;
				219
				220	dput(ofs->indexdir);
				221	dput(ofs->workdir);
				222	if (ofs->workdir_locked)
				223	ovl_inuse_unlock(ofs->workbasedir);
				224	dput(ofs->workbasedir);
				225	if (ofs->upperdir_locked)
				226	ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
				227	mntput(ofs->upper_mnt);
				228	for (i = 0; i < ofs->numlower; i++)
				229	mntput(ofs->lower_layers[i].mnt);
				230	for (i = 0; i < ofs->numlowerfs; i++)
				231	free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
				232	kfree(ofs->lower_layers);
				233	kfree(ofs->lower_fs);
				234
				235	kfree(ofs->config.lowerdir);
				236	kfree(ofs->config.upperdir);
				237	kfree(ofs->config.workdir);
				238	kfree(ofs->config.redirect_mode);
				239	if (ofs->creator_cred)
				240	put_cred(ofs->creator_cred);
				241	kfree(ofs);
				242	}
				243
				244	static void ovl_put_super(struct super_block *sb)
				245	{
				246	struct ovl_fs *ofs = sb->s_fs_info;
				247
				248	ovl_free_fs(ofs);
				249	}
				250
				251	/* Sync real dirty inodes in upper filesystem (if it exists) */
				252	static int ovl_sync_fs(struct super_block *sb, int wait)
				253	{
				254	struct ovl_fs *ofs = sb->s_fs_info;
				255	struct super_block *upper_sb;
				256	int ret;
				257
				258	if (!ofs->upper_mnt)
				259	return 0;
				260
				261	/*
				262	* If this is a sync(2) call or an emergency sync, all the super blocks
				263	* will be iterated, including upper_sb, so no need to do anything.
				264	*
				265	* If this is a syncfs(2) call, then we do need to call
				266	* sync_filesystem() on upper_sb, but enough if we do it when being
				267	* called with wait == 1.
				268	*/
				269	if (!wait)
				270	return 0;
				271
				272	upper_sb = ofs->upper_mnt->mnt_sb;
				273
				274	down_read(&upper_sb->s_umount);
				275	ret = sync_filesystem(upper_sb);
				276	up_read(&upper_sb->s_umount);
				277
				278	return ret;
				279	}
				280
				281	/**
				282	* ovl_statfs
				283	* @sb: The overlayfs super block
				284	* @buf: The struct kstatfs to fill in with stats
				285	*
				286	* Get the filesystem statistics. As writes always target the upper layer
				287	* filesystem pass the statfs to the upper filesystem (if it exists)
				288	*/
				289	static int ovl_statfs(struct dentry dentry, struct kstatfs buf)
				290	{
				291	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
				292	struct dentry *root_dentry = dentry->d_sb->s_root;
				293	struct path path;
				294	int err;
				295
				296	ovl_path_real(root_dentry, &path);
				297
				298	err = vfs_statfs(&path, buf);
				299	if (!err) {
				300	buf->f_namelen = ofs->namelen;
				301	buf->f_type = OVERLAYFS_SUPER_MAGIC;
				302	}
				303
				304	return err;
				305	}
				306
				307	/* Will this overlay be forced to mount/remount ro? */
				308	static bool ovl_force_readonly(struct ovl_fs *ofs)
				309	{
				310	return (!ofs->upper_mnt \|\| !ofs->workdir);
				311	}
				312
				313	static const char *ovl_redirect_mode_def(void)
				314	{
				315	return ovl_redirect_dir_def ? "on" : "off";
				316	}
				317
				318	enum {
				319	OVL_XINO_OFF,
				320	OVL_XINO_AUTO,
				321	OVL_XINO_ON,
				322	};
				323
				324	static const char * const ovl_xino_str[] = {
				325	"off",
				326	"auto",
				327	"on",
				328	};
				329
				330	static inline int ovl_xino_def(void)
				331	{
				332	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
				333	}
				334
				335	/**
				336	* ovl_show_options
				337	*
				338	* Prints the mount options for a given superblock.
				339	* Returns zero; does not fail.
				340	*/
				341	static int ovl_show_options(struct seq_file m, struct dentry dentry)
				342	{
				343	struct super_block *sb = dentry->d_sb;
				344	struct ovl_fs *ofs = sb->s_fs_info;
				345
				346	seq_show_option(m, "lowerdir", ofs->config.lowerdir);
				347	if (ofs->config.upperdir) {
				348	seq_show_option(m, "upperdir", ofs->config.upperdir);
				349	seq_show_option(m, "workdir", ofs->config.workdir);
				350	}
				351	if (ofs->config.default_permissions)
				352	seq_puts(m, ",default_permissions");
				353	if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
				354	seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
				355	if (ofs->config.index != ovl_index_def)
				356	seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
				357	if (ofs->config.nfs_export != ovl_nfs_export_def)
				358	seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
				359	"on" : "off");
				360	if (ofs->config.xino != ovl_xino_def())
				361	seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
				362	if (ofs->config.metacopy != ovl_metacopy_def)
				363	seq_printf(m, ",metacopy=%s",
				364	ofs->config.metacopy ? "on" : "off");
				365	return 0;
				366	}
				367
				368	static int ovl_remount(struct super_block sb, int flags, char *data)
				369	{
				370	struct ovl_fs *ofs = sb->s_fs_info;
				371
				372	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
				373	return -EROFS;
				374
				375	return 0;
				376	}
				377
				378	static const struct super_operations ovl_super_operations = {
				379	.alloc_inode = ovl_alloc_inode,
				380	.destroy_inode = ovl_destroy_inode,
				381	.drop_inode = generic_delete_inode,
				382	.put_super = ovl_put_super,
				383	.sync_fs = ovl_sync_fs,
				384	.statfs = ovl_statfs,
				385	.show_options = ovl_show_options,
				386	.remount_fs = ovl_remount,
				387	};
				388
				389	enum {
				390	OPT_LOWERDIR,
				391	OPT_UPPERDIR,
				392	OPT_WORKDIR,
				393	OPT_DEFAULT_PERMISSIONS,
				394	OPT_REDIRECT_DIR,
				395	OPT_INDEX_ON,
				396	OPT_INDEX_OFF,
				397	OPT_NFS_EXPORT_ON,
				398	OPT_NFS_EXPORT_OFF,
				399	OPT_XINO_ON,
				400	OPT_XINO_OFF,
				401	OPT_XINO_AUTO,
				402	OPT_METACOPY_ON,
				403	OPT_METACOPY_OFF,
				404	OPT_ERR,
				405	};
				406
				407	static const match_table_t ovl_tokens = {
				408	{OPT_LOWERDIR, "lowerdir=%s"},
				409	{OPT_UPPERDIR, "upperdir=%s"},
				410	{OPT_WORKDIR, "workdir=%s"},
				411	{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
				412	{OPT_REDIRECT_DIR, "redirect_dir=%s"},
				413	{OPT_INDEX_ON, "index=on"},
				414	{OPT_INDEX_OFF, "index=off"},
				415	{OPT_NFS_EXPORT_ON, "nfs_export=on"},
				416	{OPT_NFS_EXPORT_OFF, "nfs_export=off"},
				417	{OPT_XINO_ON, "xino=on"},
				418	{OPT_XINO_OFF, "xino=off"},
				419	{OPT_XINO_AUTO, "xino=auto"},
				420	{OPT_METACOPY_ON, "metacopy=on"},
				421	{OPT_METACOPY_OFF, "metacopy=off"},
				422	{OPT_ERR, NULL}
				423	};
				424
				425	static char ovl_next_opt(char *s)
				426	{
				427	char sbegin = s;
				428	char *p;
				429
				430	if (sbegin == NULL)
				431	return NULL;
				432
				433	for (p = sbegin; *p; p++) {
				434	if (*p == '\\') {
				435	p++;
				436	if (!*p)
				437	break;
				438	} else if (*p == ',') {
				439	*p = '\0';
				440	*s = p + 1;
				441	return sbegin;
				442	}
				443	}
				444	*s = NULL;
				445	return sbegin;
				446	}
				447
				448	static int ovl_parse_redirect_mode(struct ovl_config config, const char mode)
				449	{
				450	if (strcmp(mode, "on") == 0) {
				451	config->redirect_dir = true;
				452	/*
				453	* Does not make sense to have redirect creation without
				454	* redirect following.
				455	*/
				456	config->redirect_follow = true;
				457	} else if (strcmp(mode, "follow") == 0) {
				458	config->redirect_follow = true;
				459	} else if (strcmp(mode, "off") == 0) {
				460	if (ovl_redirect_always_follow)
				461	config->redirect_follow = true;
				462	} else if (strcmp(mode, "nofollow") != 0) {
				463	pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
				464	mode);
				465	return -EINVAL;
				466	}
				467
				468	return 0;
				469	}
				470
				471	static int ovl_parse_opt(char opt, struct ovl_config config)
				472	{
				473	char *p;
				474	int err;
				475	bool metacopy_opt = false, redirect_opt = false;
				476
				477	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
				478	if (!config->redirect_mode)
				479	return -ENOMEM;
				480
				481	while ((p = ovl_next_opt(&opt)) != NULL) {
				482	int token;
				483	substring_t args[MAX_OPT_ARGS];
				484
				485	if (!*p)
				486	continue;
				487
				488	token = match_token(p, ovl_tokens, args);
				489	switch (token) {
				490	case OPT_UPPERDIR:
				491	kfree(config->upperdir);
				492	config->upperdir = match_strdup(&args[0]);
				493	if (!config->upperdir)
				494	return -ENOMEM;
				495	break;
				496
				497	case OPT_LOWERDIR:
				498	kfree(config->lowerdir);
				499	config->lowerdir = match_strdup(&args[0]);
				500	if (!config->lowerdir)
				501	return -ENOMEM;
				502	break;
				503
				504	case OPT_WORKDIR:
				505	kfree(config->workdir);
				506	config->workdir = match_strdup(&args[0]);
				507	if (!config->workdir)
				508	return -ENOMEM;
				509	break;
				510
				511	case OPT_DEFAULT_PERMISSIONS:
				512	config->default_permissions = true;
				513	break;
				514
				515	case OPT_REDIRECT_DIR:
				516	kfree(config->redirect_mode);
				517	config->redirect_mode = match_strdup(&args[0]);
				518	if (!config->redirect_mode)
				519	return -ENOMEM;
				520	redirect_opt = true;
				521	break;
				522
				523	case OPT_INDEX_ON:
				524	config->index = true;
				525	break;
				526
				527	case OPT_INDEX_OFF:
				528	config->index = false;
				529	break;
				530
				531	case OPT_NFS_EXPORT_ON:
				532	config->nfs_export = true;
				533	break;
				534
				535	case OPT_NFS_EXPORT_OFF:
				536	config->nfs_export = false;
				537	break;
				538
				539	case OPT_XINO_ON:
				540	config->xino = OVL_XINO_ON;
				541	break;
				542
				543	case OPT_XINO_OFF:
				544	config->xino = OVL_XINO_OFF;
				545	break;
				546
				547	case OPT_XINO_AUTO:
				548	config->xino = OVL_XINO_AUTO;
				549	break;
				550
				551	case OPT_METACOPY_ON:
				552	config->metacopy = true;
				553	metacopy_opt = true;
				554	break;
				555
				556	case OPT_METACOPY_OFF:
				557	config->metacopy = false;
				558	break;
				559
				560	default:
				561	pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
				562	return -EINVAL;
				563	}
				564	}
				565
				566	/* Workdir is useless in non-upper mount */
				567	if (!config->upperdir && config->workdir) {
				568	pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
				569	config->workdir);
				570	kfree(config->workdir);
				571	config->workdir = NULL;
				572	}
				573
				574	err = ovl_parse_redirect_mode(config, config->redirect_mode);
				575	if (err)
				576	return err;
				577
				578	/*
				579	* This is to make the logic below simpler. It doesn't make any other
				580	* difference, since config->redirect_dir is only used for upper.
				581	*/
				582	if (!config->upperdir && config->redirect_follow)
				583	config->redirect_dir = true;
				584
				585	/* Resolve metacopy -> redirect_dir dependency */
				586	if (config->metacopy && !config->redirect_dir) {
				587	if (metacopy_opt && redirect_opt) {
				588	pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
				589	config->redirect_mode);
				590	return -EINVAL;
				591	}
				592	if (redirect_opt) {
				593	/*
				594	* There was an explicit redirect_dir=... that resulted
				595	* in this conflict.
				596	*/
				597	pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
				598	config->redirect_mode);
				599	config->metacopy = false;
				600	} else {
				601	/* Automatically enable redirect otherwise. */
				602	config->redirect_follow = config->redirect_dir = true;
				603	}
				604	}
				605
				606	return 0;
				607	}
				608
				609	#define OVL_WORKDIR_NAME "work"
				610	#define OVL_INDEXDIR_NAME "index"
				611
				612	static struct dentry ovl_workdir_create(struct ovl_fs ofs,
				613	const char *name, bool persist)
				614	{
				615	struct inode *dir = ofs->workbasedir->d_inode;
				616	struct vfsmount *mnt = ofs->upper_mnt;
				617	struct dentry *work;
				618	int err;
				619	bool retried = false;
				620	bool locked = false;
				621
				622	inode_lock_nested(dir, I_MUTEX_PARENT);
				623	locked = true;
				624
				625	retry:
				626	work = lookup_one_len(name, ofs->workbasedir, strlen(name));
				627
				628	if (!IS_ERR(work)) {
				629	struct iattr attr = {
				630	.ia_valid = ATTR_MODE,
				631	.ia_mode = S_IFDIR \| 0,
				632	};
				633
				634	if (work->d_inode) {
				635	err = -EEXIST;
				636	if (retried)
				637	goto out_dput;
				638
				639	if (persist)
				640	goto out_unlock;
				641
				642	retried = true;
				643	ovl_workdir_cleanup(dir, mnt, work, 0);
				644	dput(work);
				645	goto retry;
				646	}
				647
				648	work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
				649	err = PTR_ERR(work);
				650	if (IS_ERR(work))
				651	goto out_err;
				652
				653	/*
				654	* Try to remove POSIX ACL xattrs from workdir. We are good if:
				655	*
				656	* a) success (there was a POSIX ACL xattr and was removed)
				657	* b) -ENODATA (there was no POSIX ACL xattr)
				658	* c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
				659	*
				660	* There are various other error values that could effectively
				661	* mean that the xattr doesn't exist (e.g. -ERANGE is returned
				662	* if the xattr name is too long), but the set of filesystems
				663	* allowed as upper are limited to "normal" ones, where checking
				664	* for the above two errors is sufficient.
				665	*/
				666	err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
				667	if (err && err != -ENODATA && err != -EOPNOTSUPP)
				668	goto out_dput;
				669
				670	err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
				671	if (err && err != -ENODATA && err != -EOPNOTSUPP)
				672	goto out_dput;
				673
				674	/* Clear any inherited mode bits */
				675	inode_lock(work->d_inode);
				676	err = notify_change(work, &attr, NULL);
				677	inode_unlock(work->d_inode);
				678	if (err)
				679	goto out_dput;
				680	} else {
				681	err = PTR_ERR(work);
				682	goto out_err;
				683	}
				684	out_unlock:
				685	if (locked)
				686	inode_unlock(dir);
				687
				688	return work;
				689
				690	out_dput:
				691	dput(work);
				692	out_err:
				693	pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
				694	ofs->config.workdir, name, -err);
				695	work = NULL;
				696	goto out_unlock;
				697	}
				698
				699	static void ovl_unescape(char *s)
				700	{
				701	char *d = s;
				702
				703	for (;; s++, d++) {
				704	if (*s == '\\')
				705	s++;
				706	d = s;
				707	if (!*s)
				708	break;
				709	}
				710	}
				711
				712	static int ovl_mount_dir_noesc(const char name, struct path path)
				713	{
				714	int err = -EINVAL;
				715
				716	if (!*name) {
				717	pr_err("overlayfs: empty lowerdir\n");
				718	goto out;
				719	}
				720	err = kern_path(name, LOOKUP_FOLLOW, path);
				721	if (err) {
				722	pr_err("overlayfs: failed to resolve '%s': %i\n", name, err);
				723	goto out;
				724	}
				725	err = -EINVAL;
				726	if (ovl_dentry_weird(path->dentry)) {
				727	pr_err("overlayfs: filesystem on '%s' not supported\n", name);
				728	goto out_put;
				729	}
				730	if (!d_is_dir(path->dentry)) {
				731	pr_err("overlayfs: '%s' not a directory\n", name);
				732	goto out_put;
				733	}
				734	return 0;
				735
				736	out_put:
				737	path_put_init(path);
				738	out:
				739	return err;
				740	}
				741
				742	static int ovl_mount_dir(const char name, struct path path)
				743	{
				744	int err = -ENOMEM;
				745	char *tmp = kstrdup(name, GFP_KERNEL);
				746
				747	if (tmp) {
				748	ovl_unescape(tmp);
				749	err = ovl_mount_dir_noesc(tmp, path);
				750
				751	if (!err)
				752	if (ovl_dentry_remote(path->dentry)) {
				753	pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n",
				754	tmp);
				755	path_put_init(path);
				756	err = -EINVAL;
				757	}
				758	kfree(tmp);
				759	}
				760	return err;
				761	}
				762
				763	static int ovl_check_namelen(struct path path, struct ovl_fs ofs,
				764	const char *name)
				765	{
				766	struct kstatfs statfs;
				767	int err = vfs_statfs(path, &statfs);
				768
				769	if (err)
				770	pr_err("overlayfs: statfs failed on '%s'\n", name);
				771	else
				772	ofs->namelen = max(ofs->namelen, statfs.f_namelen);
				773
				774	return err;
				775	}
				776
				777	static int ovl_lower_dir(const char name, struct path path,
				778	struct ovl_fs ofs, int stack_depth, bool *remote)
				779	{
				780	int fh_type;
				781	int err;
				782
				783	err = ovl_mount_dir_noesc(name, path);
				784	if (err)
				785	goto out;
				786
				787	err = ovl_check_namelen(path, ofs, name);
				788	if (err)
				789	goto out_put;
				790
				791	stack_depth = max(stack_depth, path->mnt->mnt_sb->s_stack_depth);
				792
				793	if (ovl_dentry_remote(path->dentry))
				794	*remote = true;
				795
				796	/*
				797	* The inodes index feature and NFS export need to encode and decode
				798	* file handles, so they require that all layers support them.
				799	*/
				800	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
				801	if ((ofs->config.nfs_export \|\|
				802	(ofs->config.index && ofs->config.upperdir)) && !fh_type) {
				803	ofs->config.index = false;
				804	ofs->config.nfs_export = false;
				805	pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
				806	name);
				807	}
				808
				809	/* Check if lower fs has 32bit inode numbers */
				810	if (fh_type != FILEID_INO32_GEN)
				811	ofs->xino_bits = 0;
				812
				813	return 0;
				814
				815	out_put:
				816	path_put_init(path);
				817	out:
				818	return err;
				819	}
				820
				821	/* Workdir should not be subdir of upperdir and vice versa */
				822	static bool ovl_workdir_ok(struct dentry workdir, struct dentry upperdir)
				823	{
				824	bool ok = false;
				825
				826	if (workdir != upperdir) {
				827	ok = (lock_rename(workdir, upperdir) == NULL);
				828	unlock_rename(workdir, upperdir);
				829	}
				830	return ok;
				831	}
				832
				833	static unsigned int ovl_split_lowerdirs(char *str)
				834	{
				835	unsigned int ctr = 1;
				836	char s, d;
				837
				838	for (s = d = str;; s++, d++) {
				839	if (*s == '\\') {
				840	s++;
				841	} else if (*s == ':') {
				842	*d = '\0';
				843	ctr++;
				844	continue;
				845	}
				846	d = s;
				847	if (!*s)
				848	break;
				849	}
				850	return ctr;
				851	}
				852
				853	static int __maybe_unused
				854	ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
				855	struct dentry dentry, struct inode inode,
				856	const char name, void buffer, size_t size)
				857	{
				858	return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
				859	}
				860
				861	static int __maybe_unused
				862	ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
				863	struct dentry dentry, struct inode inode,
				864	const char name, const void value,
				865	size_t size, int flags)
				866	{
				867	struct dentry *workdir = ovl_workdir(dentry);
				868	struct inode *realinode = ovl_inode_real(inode);
				869	struct posix_acl *acl = NULL;
				870	int err;
				871
				872	/* Check that everything is OK before copy-up */
				873	if (value) {
				874	acl = posix_acl_from_xattr(&init_user_ns, value, size);
				875	if (IS_ERR(acl))
				876	return PTR_ERR(acl);
				877	}
				878	err = -EOPNOTSUPP;
				879	if (!IS_POSIXACL(d_inode(workdir)))
				880	goto out_acl_release;
				881	if (!realinode->i_op->set_acl)
				882	goto out_acl_release;
				883	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
				884	err = acl ? -EACCES : 0;
				885	goto out_acl_release;
				886	}
				887	err = -EPERM;
				888	if (!inode_owner_or_capable(inode))
				889	goto out_acl_release;
				890
				891	posix_acl_release(acl);
				892
				893	/*
				894	* Check if sgid bit needs to be cleared (actual setacl operation will
				895	* be done with mounter's capabilities and so that won't do it for us).
				896	*/
				897	if (unlikely(inode->i_mode & S_ISGID) &&
				898	handler->flags == ACL_TYPE_ACCESS &&
				899	!in_group_p(inode->i_gid) &&
				900	!capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
				901	struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
				902
				903	err = ovl_setattr(dentry, &iattr);
				904	if (err)
				905	return err;
				906	}
				907
				908	err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
				909	if (!err)
				910	ovl_copyattr(ovl_inode_real(inode), inode);
				911
				912	return err;
				913
				914	out_acl_release:
				915	posix_acl_release(acl);
				916	return err;
				917	}
				918
				919	static int ovl_own_xattr_get(const struct xattr_handler *handler,
				920	struct dentry dentry, struct inode inode,
				921	const char name, void buffer, size_t size)
				922	{
				923	return -EOPNOTSUPP;
				924	}
				925
				926	static int ovl_own_xattr_set(const struct xattr_handler *handler,
				927	struct dentry dentry, struct inode inode,
				928	const char name, const void value,
				929	size_t size, int flags)
				930	{
				931	return -EOPNOTSUPP;
				932	}
				933
				934	static int ovl_other_xattr_get(const struct xattr_handler *handler,
				935	struct dentry dentry, struct inode inode,
				936	const char name, void buffer, size_t size)
				937	{
				938	return ovl_xattr_get(dentry, inode, name, buffer, size);
				939	}
				940
				941	static int ovl_other_xattr_set(const struct xattr_handler *handler,
				942	struct dentry dentry, struct inode inode,
				943	const char name, const void value,
				944	size_t size, int flags)
				945	{
				946	return ovl_xattr_set(dentry, inode, name, value, size, flags);
				947	}
				948
				949	static const struct xattr_handler __maybe_unused
				950	ovl_posix_acl_access_xattr_handler = {
				951	.name = XATTR_NAME_POSIX_ACL_ACCESS,
				952	.flags = ACL_TYPE_ACCESS,
				953	.get = ovl_posix_acl_xattr_get,
				954	.set = ovl_posix_acl_xattr_set,
				955	};
				956
				957	static const struct xattr_handler __maybe_unused
				958	ovl_posix_acl_default_xattr_handler = {
				959	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
				960	.flags = ACL_TYPE_DEFAULT,
				961	.get = ovl_posix_acl_xattr_get,
				962	.set = ovl_posix_acl_xattr_set,
				963	};
				964
				965	static const struct xattr_handler ovl_own_xattr_handler = {
				966	.prefix = OVL_XATTR_PREFIX,
				967	.get = ovl_own_xattr_get,
				968	.set = ovl_own_xattr_set,
				969	};
				970
				971	static const struct xattr_handler ovl_other_xattr_handler = {
				972	.prefix = "", /* catch all */
				973	.get = ovl_other_xattr_get,
				974	.set = ovl_other_xattr_set,
				975	};
				976
				977	static const struct xattr_handler *ovl_xattr_handlers[] = {
				978	#ifdef CONFIG_FS_POSIX_ACL
				979	&ovl_posix_acl_access_xattr_handler,
				980	&ovl_posix_acl_default_xattr_handler,
				981	#endif
				982	&ovl_own_xattr_handler,
				983	&ovl_other_xattr_handler,
				984	NULL
				985	};
				986
				987	static int ovl_get_upper(struct ovl_fs ofs, struct path upperpath)
				988	{
				989	struct vfsmount *upper_mnt;
				990	int err;
				991
				992	err = ovl_mount_dir(ofs->config.upperdir, upperpath);
				993	if (err)
				994	goto out;
				995
				996	/* Upper fs should not be r/o */
				997	if (sb_rdonly(upperpath->mnt->mnt_sb)) {
				998	pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
				999	err = -EINVAL;
				1000	goto out;
				1001	}
				1002
				1003	err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
				1004	if (err)
				1005	goto out;
				1006
				1007	upper_mnt = clone_private_mount(upperpath);
				1008	err = PTR_ERR(upper_mnt);
				1009	if (IS_ERR(upper_mnt)) {
				1010	pr_err("overlayfs: failed to clone upperpath\n");
				1011	goto out;
				1012	}
				1013
				1014	/* Don't inherit atime flags */
				1015	upper_mnt->mnt_flags &= ~(MNT_NOATIME \| MNT_NODIRATIME \| MNT_RELATIME);
				1016	ofs->upper_mnt = upper_mnt;
				1017
				1018	err = -EBUSY;
				1019	if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) {
				1020	ofs->upperdir_locked = true;
				1021	} else if (ofs->config.index) {
				1022	pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n");
				1023	goto out;
				1024	} else {
				1025	pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
				1026	}
				1027
				1028	err = 0;
				1029	out:
				1030	return err;
				1031	}
				1032
				1033	static int ovl_make_workdir(struct ovl_fs ofs, struct path workpath)
				1034	{
				1035	struct vfsmount *mnt = ofs->upper_mnt;
				1036	struct dentry *temp;
				1037	int fh_type;
				1038	int err;
				1039
				1040	err = mnt_want_write(mnt);
				1041	if (err)
				1042	return err;
				1043
				1044	ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
				1045	if (!ofs->workdir)
				1046	goto out;
				1047
				1048	/*
				1049	* Upper should support d_type, else whiteouts are visible. Given
				1050	* workdir and upper are on same fs, we can do iterate_dir() on
				1051	* workdir. This check requires successful creation of workdir in
				1052	* previous step.
				1053	*/
				1054	err = ovl_check_d_type_supported(workpath);
				1055	if (err < 0)
				1056	goto out;
				1057
				1058	/*
				1059	* We allowed this configuration and don't want to break users over
				1060	* kernel upgrade. So warn instead of erroring out.
				1061	*/
				1062	if (!err)
				1063	pr_warn("overlayfs: upper fs needs to support d_type.\n");
				1064
				1065	/* Check if upper/work fs supports O_TMPFILE */
				1066	temp = ovl_do_tmpfile(ofs->workdir, S_IFREG \| 0);
				1067	ofs->tmpfile = !IS_ERR(temp);
				1068	if (ofs->tmpfile)
				1069	dput(temp);
				1070	else
				1071	pr_warn("overlayfs: upper fs does not support tmpfile.\n");
				1072
				1073	/*
				1074	* Check if upper/work fs supports trusted.overlay.* xattr
				1075	*/
				1076	err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0);
				1077	if (err) {
				1078	ofs->noxattr = true;
				1079	ofs->config.index = false;
				1080	ofs->config.metacopy = false;
				1081	pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
				1082	err = 0;
				1083	} else {
				1084	vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
				1085	}
				1086
				1087	/* Check if upper/work fs supports file handles */
				1088	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
				1089	if (ofs->config.index && !fh_type) {
				1090	ofs->config.index = false;
				1091	pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
				1092	}
				1093
				1094	/* Check if upper fs has 32bit inode numbers */
				1095	if (fh_type != FILEID_INO32_GEN)
				1096	ofs->xino_bits = 0;
				1097
				1098	/* NFS export of r/w mount depends on index */
				1099	if (ofs->config.nfs_export && !ofs->config.index) {
				1100	pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
				1101	ofs->config.nfs_export = false;
				1102	}
				1103	out:
				1104	mnt_drop_write(mnt);
				1105	return err;
				1106	}
				1107
				1108	static int ovl_get_workdir(struct ovl_fs ofs, struct path upperpath)
				1109	{
				1110	int err;
				1111	struct path workpath = { };
				1112
				1113	err = ovl_mount_dir(ofs->config.workdir, &workpath);
				1114	if (err)
				1115	goto out;
				1116
				1117	err = -EINVAL;
				1118	if (upperpath->mnt != workpath.mnt) {
				1119	pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
				1120	goto out;
				1121	}
				1122	if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
				1123	pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
				1124	goto out;
				1125	}
				1126
				1127	ofs->workbasedir = dget(workpath.dentry);
				1128
				1129	err = -EBUSY;
				1130	if (ovl_inuse_trylock(ofs->workbasedir)) {
				1131	ofs->workdir_locked = true;
				1132	} else if (ofs->config.index) {
				1133	pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n");
				1134	goto out;
				1135	} else {
				1136	pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n");
				1137	}
				1138
				1139	err = ovl_make_workdir(ofs, &workpath);
				1140	if (err)
				1141	goto out;
				1142
				1143	err = 0;
				1144	out:
				1145	path_put(&workpath);
				1146
				1147	return err;
				1148	}
				1149
				1150	static int ovl_get_indexdir(struct ovl_fs ofs, struct ovl_entry oe,
				1151	struct path *upperpath)
				1152	{
				1153	struct vfsmount *mnt = ofs->upper_mnt;
				1154	int err;
				1155
				1156	err = mnt_want_write(mnt);
				1157	if (err)
				1158	return err;
				1159
				1160	/* Verify lower root is upper root origin */
				1161	err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry,
				1162	true);
				1163	if (err) {
				1164	pr_err("overlayfs: failed to verify upper root origin\n");
				1165	goto out;
				1166	}
				1167
				1168	ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
				1169	if (ofs->indexdir) {
				1170	/*
				1171	* Verify upper root is exclusively associated with index dir.
				1172	* Older kernels stored upper fh in "trusted.overlay.origin"
				1173	* xattr. If that xattr exists, verify that it is a match to
				1174	* upper dir file handle. In any case, verify or set xattr
				1175	* "trusted.overlay.upper" to indicate that index may have
				1176	* directory entries.
				1177	*/
				1178	if (ovl_check_origin_xattr(ofs->indexdir)) {
				1179	err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN,
				1180	upperpath->dentry, true, false);
				1181	if (err)
				1182	pr_err("overlayfs: failed to verify index dir 'origin' xattr\n");
				1183	}
				1184	err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true);
				1185	if (err)
				1186	pr_err("overlayfs: failed to verify index dir 'upper' xattr\n");
				1187
				1188	/* Cleanup bad/stale/orphan index entries */
				1189	if (!err)
				1190	err = ovl_indexdir_cleanup(ofs);
				1191	}
				1192	if (err \|\| !ofs->indexdir)
				1193	pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
				1194
				1195	out:
				1196	mnt_drop_write(mnt);
				1197	return err;
				1198	}
				1199
				1200	/* Get a unique fsid for the layer */
				1201	static int ovl_get_fsid(struct ovl_fs ofs, struct super_block sb)
				1202	{
				1203	unsigned int i;
				1204	dev_t dev;
				1205	int err;
				1206
				1207	/* fsid 0 is reserved for upper fs even with non upper overlay */
				1208	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
				1209	return 0;
				1210
				1211	for (i = 0; i < ofs->numlowerfs; i++) {
				1212	if (ofs->lower_fs[i].sb == sb)
				1213	return i + 1;
				1214	}
				1215
				1216	err = get_anon_bdev(&dev);
				1217	if (err) {
				1218	pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
				1219	return err;
				1220	}
				1221
				1222	ofs->lower_fs[ofs->numlowerfs].sb = sb;
				1223	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
				1224	ofs->numlowerfs++;
				1225
				1226	return ofs->numlowerfs;
				1227	}
				1228
				1229	static int ovl_get_lower_layers(struct ovl_fs ofs, struct path stack,
				1230	unsigned int numlower)
				1231	{
				1232	int err;
				1233	unsigned int i;
				1234
				1235	err = -ENOMEM;
				1236	ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer),
				1237	GFP_KERNEL);
				1238	if (ofs->lower_layers == NULL)
				1239	goto out;
				1240
				1241	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
				1242	GFP_KERNEL);
				1243	if (ofs->lower_fs == NULL)
				1244	goto out;
				1245
				1246	for (i = 0; i < numlower; i++) {
				1247	struct vfsmount *mnt;
				1248	int fsid;
				1249
				1250	err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
				1251	if (err < 0)
				1252	goto out;
				1253
				1254	mnt = clone_private_mount(&stack[i]);
				1255	err = PTR_ERR(mnt);
				1256	if (IS_ERR(mnt)) {
				1257	pr_err("overlayfs: failed to clone lowerpath\n");
				1258	goto out;
				1259	}
				1260
				1261	/*
				1262	* Make lower layers R/O. That way fchmod/fchown on lower file
				1263	* will fail instead of modifying lower fs.
				1264	*/
				1265	mnt->mnt_flags \|= MNT_READONLY \| MNT_NOATIME;
				1266
				1267	ofs->lower_layers[ofs->numlower].mnt = mnt;
				1268	ofs->lower_layers[ofs->numlower].idx = i + 1;
				1269	ofs->lower_layers[ofs->numlower].fsid = fsid;
				1270	if (fsid) {
				1271	ofs->lower_layers[ofs->numlower].fs =
				1272	&ofs->lower_fs[fsid - 1];
				1273	}
				1274	ofs->numlower++;
				1275	}
				1276
				1277	/*
				1278	* When all layers on same fs, overlay can use real inode numbers.
				1279	* With mount option "xino=on", mounter declares that there are enough
				1280	* free high bits in underlying fs to hold the unique fsid.
				1281	* If overlayfs does encounter underlying inodes using the high xino
				1282	* bits reserved for fsid, it emits a warning and uses the original
				1283	* inode number.
				1284	*/
				1285	if (!ofs->numlowerfs \|\| (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
				1286	ofs->xino_bits = 0;
				1287	ofs->config.xino = OVL_XINO_OFF;
				1288	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
				1289	/*
				1290	* This is a roundup of number of bits needed for numlowerfs+1
				1291	* (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
				1292	* upper fs even with non upper overlay.
				1293	*/
				1294	BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
				1295	ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
				1296	}
				1297
				1298	if (ofs->xino_bits) {
				1299	pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
				1300	ofs->xino_bits);
				1301	}
				1302
				1303	err = 0;
				1304	out:
				1305	return err;
				1306	}
				1307
				1308	static struct ovl_entry ovl_get_lowerstack(struct super_block sb,
				1309	struct ovl_fs *ofs)
				1310	{
				1311	int err;
				1312	char lowertmp, lower;
				1313	struct path *stack = NULL;
				1314	unsigned int stacklen, numlower = 0, i;
				1315	bool remote = false;
				1316	struct ovl_entry *oe;
				1317
				1318	err = -ENOMEM;
				1319	lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
				1320	if (!lowertmp)
				1321	goto out_err;
				1322
				1323	err = -EINVAL;
				1324	stacklen = ovl_split_lowerdirs(lowertmp);
				1325	if (stacklen > OVL_MAX_STACK) {
				1326	pr_err("overlayfs: too many lower directories, limit is %d\n",
				1327	OVL_MAX_STACK);
				1328	goto out_err;
				1329	} else if (!ofs->config.upperdir && stacklen == 1) {
				1330	pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
				1331	goto out_err;
				1332	} else if (!ofs->config.upperdir && ofs->config.nfs_export &&
				1333	ofs->config.redirect_follow) {
				1334	pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
				1335	ofs->config.nfs_export = false;
				1336	}
				1337
				1338	err = -ENOMEM;
				1339	stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
				1340	if (!stack)
				1341	goto out_err;
				1342
				1343	err = -EINVAL;
				1344	lower = lowertmp;
				1345	for (numlower = 0; numlower < stacklen; numlower++) {
				1346	err = ovl_lower_dir(lower, &stack[numlower], ofs,
				1347	&sb->s_stack_depth, &remote);
				1348	if (err)
				1349	goto out_err;
				1350
				1351	lower = strchr(lower, '\0') + 1;
				1352	}
				1353
				1354	err = -EINVAL;
				1355	sb->s_stack_depth++;
				1356	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
				1357	pr_err("overlayfs: maximum fs stacking depth exceeded\n");
				1358	goto out_err;
				1359	}
				1360
				1361	err = ovl_get_lower_layers(ofs, stack, numlower);
				1362	if (err)
				1363	goto out_err;
				1364
				1365	err = -ENOMEM;
				1366	oe = ovl_alloc_entry(numlower);
				1367	if (!oe)
				1368	goto out_err;
				1369
				1370	for (i = 0; i < numlower; i++) {
				1371	oe->lowerstack[i].dentry = dget(stack[i].dentry);
				1372	oe->lowerstack[i].layer = &ofs->lower_layers[i];
				1373	}
				1374
				1375	if (remote)
				1376	sb->s_d_op = &ovl_reval_dentry_operations;
				1377	else
				1378	sb->s_d_op = &ovl_dentry_operations;
				1379
				1380	out:
				1381	for (i = 0; i < numlower; i++)
				1382	path_put(&stack[i]);
				1383	kfree(stack);
				1384	kfree(lowertmp);
				1385
				1386	return oe;
				1387
				1388	out_err:
				1389	oe = ERR_PTR(err);
				1390	goto out;
				1391	}
				1392
				1393	static int ovl_fill_super(struct super_block sb, void data, int silent)
				1394	{
				1395	struct path upperpath = { };
				1396	struct dentry *root_dentry;
				1397	struct ovl_entry *oe;
				1398	struct ovl_fs *ofs;
				1399	struct cred *cred;
				1400	int err;
				1401
				1402	err = -ENOMEM;
				1403	ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
				1404	if (!ofs)
				1405	goto out;
				1406
				1407	ofs->creator_cred = cred = prepare_creds();
				1408	if (!cred)
				1409	goto out_err;
				1410
				1411	ofs->config.index = ovl_index_def;
				1412	ofs->config.nfs_export = ovl_nfs_export_def;
				1413	ofs->config.xino = ovl_xino_def();
				1414	ofs->config.metacopy = ovl_metacopy_def;
				1415	err = ovl_parse_opt((char *) data, &ofs->config);
				1416	if (err)
				1417	goto out_err;
				1418
				1419	err = -EINVAL;
				1420	if (!ofs->config.lowerdir) {
				1421	if (!silent)
				1422	pr_err("overlayfs: missing 'lowerdir'\n");
				1423	goto out_err;
				1424	}
				1425
				1426	sb->s_stack_depth = 0;
				1427	sb->s_maxbytes = MAX_LFS_FILESIZE;
				1428	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
				1429	if (ofs->config.xino != OVL_XINO_OFF)
				1430	ofs->xino_bits = BITS_PER_LONG - 32;
				1431
				1432	if (ofs->config.upperdir) {
				1433	if (!ofs->config.workdir) {
				1434	pr_err("overlayfs: missing 'workdir'\n");
				1435	goto out_err;
				1436	}
				1437
				1438	err = ovl_get_upper(ofs, &upperpath);
				1439	if (err)
				1440	goto out_err;
				1441
				1442	err = ovl_get_workdir(ofs, &upperpath);
				1443	if (err)
				1444	goto out_err;
				1445
				1446	if (!ofs->workdir)
				1447	sb->s_flags \|= SB_RDONLY;
				1448
				1449	sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth;
				1450	sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran;
				1451
				1452	}
				1453	oe = ovl_get_lowerstack(sb, ofs);
				1454	err = PTR_ERR(oe);
				1455	if (IS_ERR(oe))
				1456	goto out_err;
				1457
				1458	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
				1459	if (!ofs->upper_mnt)
				1460	sb->s_flags \|= SB_RDONLY;
				1461
				1462	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
				1463	err = ovl_get_indexdir(ofs, oe, &upperpath);
				1464	if (err)
				1465	goto out_free_oe;
				1466
				1467	/* Force r/o mount with no index dir */
				1468	if (!ofs->indexdir) {
				1469	dput(ofs->workdir);
				1470	ofs->workdir = NULL;
				1471	sb->s_flags \|= SB_RDONLY;
				1472	}
				1473
				1474	}
				1475
				1476	/* Show index=off in /proc/mounts for forced r/o mount */
				1477	if (!ofs->indexdir) {
				1478	ofs->config.index = false;
				1479	if (ofs->upper_mnt && ofs->config.nfs_export) {
				1480	pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n");
				1481	ofs->config.nfs_export = false;
				1482	}
				1483	}
				1484
				1485	if (ofs->config.metacopy && ofs->config.nfs_export) {
				1486	pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
				1487	ofs->config.nfs_export = false;
				1488	}
				1489
				1490	if (ofs->config.nfs_export)
				1491	sb->s_export_op = &ovl_export_operations;
				1492
				1493	/* Never override disk quota limits or use reserved space */
				1494	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
				1495
				1496	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
				1497	sb->s_op = &ovl_super_operations;
				1498	sb->s_xattr = ovl_xattr_handlers;
				1499	sb->s_fs_info = ofs;
				1500	sb->s_flags \|= SB_POSIXACL;
				1501
				1502	err = -ENOMEM;
				1503	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
				1504	if (!root_dentry)
				1505	goto out_free_oe;
				1506
				1507	root_dentry->d_fsdata = oe;
				1508
				1509	mntput(upperpath.mnt);
				1510	if (upperpath.dentry) {
				1511	ovl_dentry_set_upper_alias(root_dentry);
				1512	if (ovl_is_impuredir(upperpath.dentry))
				1513	ovl_set_flag(OVL_IMPURE, d_inode(root_dentry));
				1514	}
				1515
				1516	/* Root is always merge -> can have whiteouts */
				1517	ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
				1518	ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
				1519	ovl_set_upperdata(d_inode(root_dentry));
				1520	ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
				1521	ovl_dentry_lower(root_dentry), NULL);
				1522
				1523	sb->s_root = root_dentry;
				1524
				1525	return 0;
				1526
				1527	out_free_oe:
				1528	ovl_entry_stack_free(oe);
				1529	kfree(oe);
				1530	out_err:
				1531	path_put(&upperpath);
				1532	ovl_free_fs(ofs);
				1533	out:
				1534	return err;
				1535	}
				1536
				1537	static struct dentry ovl_mount(struct file_system_type fs_type, int flags,
				1538	const char dev_name, void raw_data)
				1539	{
				1540	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
				1541	}
				1542
				1543	static struct file_system_type ovl_fs_type = {
				1544	.owner = THIS_MODULE,
				1545	.name = "overlay",
				1546	.mount = ovl_mount,
				1547	.kill_sb = kill_anon_super,
				1548	};
				1549	MODULE_ALIAS_FS("overlay");
				1550
				1551	static void ovl_inode_init_once(void *foo)
				1552	{
				1553	struct ovl_inode *oi = foo;
				1554
				1555	inode_init_once(&oi->vfs_inode);
				1556	}
				1557
				1558	static int __init ovl_init(void)
				1559	{
				1560	int err;
				1561
				1562	ovl_inode_cachep = kmem_cache_create("ovl_inode",
				1563	sizeof(struct ovl_inode), 0,
				1564	(SLAB_RECLAIM_ACCOUNT\|
				1565	SLAB_MEM_SPREAD\|SLAB_ACCOUNT),
				1566	ovl_inode_init_once);
				1567	if (ovl_inode_cachep == NULL)
				1568	return -ENOMEM;
				1569
				1570	err = register_filesystem(&ovl_fs_type);
				1571	if (err)
				1572	kmem_cache_destroy(ovl_inode_cachep);
				1573
				1574	return err;
				1575	}
				1576
				1577	static void __exit ovl_exit(void)
				1578	{
				1579	unregister_filesystem(&ovl_fs_type);
				1580
				1581	/*
				1582	* Make sure all delayed rcu free inodes are flushed before we
				1583	* destroy cache.
				1584	*/
				1585	rcu_barrier();
				1586	kmem_cache_destroy(ovl_inode_cachep);
				1587
				1588	}
				1589
				1590	module_init(ovl_init);
				1591	module_exit(ovl_exit);