Blame - fs/nfsd/filecache.c - hafnium/third_party/linux

blob: 662937472e9bd2a1094e29e309f1bb65516f3fcd [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	/*
				2	* Open file cache.
				3	*
				4	* (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
				5	*/
				6
				7	#include <linux/hash.h>
				8	#include <linux/slab.h>
				9	#include <linux/file.h>
				10	#include <linux/sched.h>
				11	#include <linux/list_lru.h>
				12	#include <linux/fsnotify_backend.h>
				13	#include <linux/fsnotify.h>
				14	#include <linux/seq_file.h>
				15
				16	#include "vfs.h"
				17	#include "nfsd.h"
				18	#include "nfsfh.h"
				19	#include "netns.h"
				20	#include "filecache.h"
				21	#include "trace.h"
				22
				23	#define NFSDDBG_FACILITY NFSDDBG_FH
				24
				25	/* FIXME: dynamically size this for the machine somehow? */
				26	#define NFSD_FILE_HASH_BITS 12
				27	#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
				28	#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
				29
				30	#define NFSD_FILE_LRU_RESCAN (0)
				31	#define NFSD_FILE_SHUTDOWN (1)
				32	#define NFSD_FILE_LRU_THRESHOLD (4096UL)
				33	#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
				34
				35	/* We only care about NFSD_MAY_READ/WRITE for this cache */
				36	#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ\|NFSD_MAY_WRITE)
				37
				38	struct nfsd_fcache_bucket {
				39	struct hlist_head nfb_head;
				40	spinlock_t nfb_lock;
				41	unsigned int nfb_count;
				42	unsigned int nfb_maxcount;
				43	};
				44
				45	static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
				46
				47	static struct kmem_cache *nfsd_file_slab;
				48	static struct kmem_cache *nfsd_file_mark_slab;
				49	static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
				50	static struct list_lru nfsd_file_lru;
				51	static long nfsd_file_lru_flags;
				52	static struct fsnotify_group *nfsd_file_fsnotify_group;
				53	static atomic_long_t nfsd_filecache_count;
				54	static struct delayed_work nfsd_filecache_laundrette;
				55
				56	enum nfsd_file_laundrette_ctl {
				57	NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
				58	NFSD_FILE_LAUNDRETTE_MAY_FLUSH
				59	};
				60
				61	static void
				62	nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
				63	{
				64	long count = atomic_long_read(&nfsd_filecache_count);
				65
				66	if (count == 0 \|\| test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
				67	return;
				68
				69	/* Be more aggressive about scanning if over the threshold */
				70	if (count > NFSD_FILE_LRU_THRESHOLD)
				71	mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
				72	else
				73	schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
				74
				75	if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
				76	return;
				77
				78	/* ...and don't delay flushing if we're out of control */
				79	if (count >= NFSD_FILE_LRU_LIMIT)
				80	flush_delayed_work(&nfsd_filecache_laundrette);
				81	}
				82
				83	static void
				84	nfsd_file_slab_free(struct rcu_head *rcu)
				85	{
				86	struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
				87
				88	put_cred(nf->nf_cred);
				89	kmem_cache_free(nfsd_file_slab, nf);
				90	}
				91
				92	static void
				93	nfsd_file_mark_free(struct fsnotify_mark *mark)
				94	{
				95	struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
				96	nfm_mark);
				97
				98	kmem_cache_free(nfsd_file_mark_slab, nfm);
				99	}
				100
				101	static struct nfsd_file_mark *
				102	nfsd_file_mark_get(struct nfsd_file_mark *nfm)
				103	{
				104	if (!atomic_inc_not_zero(&nfm->nfm_ref))
				105	return NULL;
				106	return nfm;
				107	}
				108
				109	static void
				110	nfsd_file_mark_put(struct nfsd_file_mark *nfm)
				111	{
				112	if (atomic_dec_and_test(&nfm->nfm_ref)) {
				113
				114	fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
				115	fsnotify_put_mark(&nfm->nfm_mark);
				116	}
				117	}
				118
				119	static struct nfsd_file_mark *
				120	nfsd_file_mark_find_or_create(struct nfsd_file *nf)
				121	{
				122	int err;
				123	struct fsnotify_mark *mark;
				124	struct nfsd_file_mark nfm = NULL, new;
				125	struct inode *inode = nf->nf_inode;
				126
				127	do {
				128	mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
				129	mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
				130	nfsd_file_fsnotify_group);
				131	if (mark) {
				132	nfm = nfsd_file_mark_get(container_of(mark,
				133	struct nfsd_file_mark,
				134	nfm_mark));
				135	mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	136	if (nfm) {
				137	fsnotify_put_mark(mark);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	138	break;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	139	}
				140	/* Avoid soft lockup race with nfsd_file_mark_put() */
				141	fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
				142	fsnotify_put_mark(mark);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	143	} else
				144	mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
				145
				146	/* allocate a new nfm */
				147	new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
				148	if (!new)
				149	return NULL;
				150	fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
				151	new->nfm_mark.mask = FS_ATTRIB\|FS_DELETE_SELF;
				152	atomic_set(&new->nfm_ref, 1);
				153
				154	err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
				155
				156	/*
				157	* If the add was successful, then return the object.
				158	* Otherwise, we need to put the reference we hold on the
				159	* nfm_mark. The fsnotify code will take a reference and put
				160	* it on failure, so we can't just free it directly. It's also
				161	* not safe to call fsnotify_destroy_mark on it as the
				162	* mark->group will be NULL. Thus, we can't let the nfm_ref
				163	* counter drive the destruction at this point.
				164	*/
				165	if (likely(!err))
				166	nfm = new;
				167	else
				168	fsnotify_put_mark(&new->nfm_mark);
				169	} while (unlikely(err == -EEXIST));
				170
				171	return nfm;
				172	}
				173
				174	static struct nfsd_file *
				175	nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
				176	struct net *net)
				177	{
				178	struct nfsd_file *nf;
				179
				180	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
				181	if (nf) {
				182	INIT_HLIST_NODE(&nf->nf_node);
				183	INIT_LIST_HEAD(&nf->nf_lru);
				184	nf->nf_file = NULL;
				185	nf->nf_cred = get_current_cred();
				186	nf->nf_net = net;
				187	nf->nf_flags = 0;
				188	nf->nf_inode = inode;
				189	nf->nf_hashval = hashval;
				190	atomic_set(&nf->nf_ref, 1);
				191	nf->nf_may = may & NFSD_FILE_MAY_MASK;
				192	if (may & NFSD_MAY_NOT_BREAK_LEASE) {
				193	if (may & NFSD_MAY_WRITE)
				194	__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
				195	if (may & NFSD_MAY_READ)
				196	__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
				197	}
				198	nf->nf_mark = NULL;
				199	trace_nfsd_file_alloc(nf);
				200	}
				201	return nf;
				202	}
				203
				204	static bool
				205	nfsd_file_free(struct nfsd_file *nf)
				206	{
				207	bool flush = false;
				208
				209	trace_nfsd_file_put_final(nf);
				210	if (nf->nf_mark)
				211	nfsd_file_mark_put(nf->nf_mark);
				212	if (nf->nf_file) {
				213	get_file(nf->nf_file);
				214	filp_close(nf->nf_file, NULL);
				215	fput(nf->nf_file);
				216	flush = true;
				217	}
				218	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
				219	return flush;
				220	}
				221
				222	static bool
				223	nfsd_file_check_writeback(struct nfsd_file *nf)
				224	{
				225	struct file *file = nf->nf_file;
				226	struct address_space *mapping;
				227
				228	if (!file \|\| !(file->f_mode & FMODE_WRITE))
				229	return false;
				230	mapping = file->f_mapping;
				231	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) \|\|
				232	mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
				233	}
				234
				235	static int
				236	nfsd_file_check_write_error(struct nfsd_file *nf)
				237	{
				238	struct file *file = nf->nf_file;
				239
				240	if (!file \|\| !(file->f_mode & FMODE_WRITE))
				241	return 0;
				242	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
				243	}
				244
				245	static bool
				246	nfsd_file_in_use(struct nfsd_file *nf)
				247	{
				248	return nfsd_file_check_writeback(nf) \|\|
				249	nfsd_file_check_write_error(nf);
				250	}
				251
				252	static void
				253	nfsd_file_do_unhash(struct nfsd_file *nf)
				254	{
				255	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
				256
				257	trace_nfsd_file_unhash(nf);
				258
				259	if (nfsd_file_check_write_error(nf))
				260	nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
				261	--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
				262	hlist_del_rcu(&nf->nf_node);
				263	if (!list_empty(&nf->nf_lru))
				264	list_lru_del(&nfsd_file_lru, &nf->nf_lru);
				265	atomic_long_dec(&nfsd_filecache_count);
				266	}
				267
				268	static bool
				269	nfsd_file_unhash(struct nfsd_file *nf)
				270	{
				271	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
				272	nfsd_file_do_unhash(nf);
				273	return true;
				274	}
				275	return false;
				276	}
				277
				278	/*
				279	* Return true if the file was unhashed.
				280	*/
				281	static bool
				282	nfsd_file_unhash_and_release_locked(struct nfsd_file nf, struct list_head dispose)
				283	{
				284	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
				285
				286	trace_nfsd_file_unhash_and_release_locked(nf);
				287	if (!nfsd_file_unhash(nf))
				288	return false;
				289	/* keep final reference for nfsd_file_lru_dispose */
				290	if (atomic_add_unless(&nf->nf_ref, -1, 1))
				291	return true;
				292
				293	list_add(&nf->nf_lru, dispose);
				294	return true;
				295	}
				296
				297	static int
				298	nfsd_file_put_noref(struct nfsd_file *nf)
				299	{
				300	int count;
				301	trace_nfsd_file_put(nf);
				302
				303	count = atomic_dec_return(&nf->nf_ref);
				304	if (!count) {
				305	WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
				306	nfsd_file_free(nf);
				307	}
				308	return count;
				309	}
				310
				311	void
				312	nfsd_file_put(struct nfsd_file *nf)
				313	{
				314	bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
				315	bool unused = !nfsd_file_in_use(nf);
				316
				317	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
				318	if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
				319	nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
				320	}
				321
				322	struct nfsd_file *
				323	nfsd_file_get(struct nfsd_file *nf)
				324	{
				325	if (likely(atomic_inc_not_zero(&nf->nf_ref)))
				326	return nf;
				327	return NULL;
				328	}
				329
				330	static void
				331	nfsd_file_dispose_list(struct list_head *dispose)
				332	{
				333	struct nfsd_file *nf;
				334
				335	while(!list_empty(dispose)) {
				336	nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
				337	list_del(&nf->nf_lru);
				338	nfsd_file_put_noref(nf);
				339	}
				340	}
				341
				342	static void
				343	nfsd_file_dispose_list_sync(struct list_head *dispose)
				344	{
				345	bool flush = false;
				346	struct nfsd_file *nf;
				347
				348	while(!list_empty(dispose)) {
				349	nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
				350	list_del(&nf->nf_lru);
				351	if (!atomic_dec_and_test(&nf->nf_ref))
				352	continue;
				353	if (nfsd_file_free(nf))
				354	flush = true;
				355	}
				356	if (flush)
				357	flush_delayed_fput();
				358	}
				359
				360	/*
				361	* Note this can deadlock with nfsd_file_cache_purge.
				362	*/
				363	static enum lru_status
				364	nfsd_file_lru_cb(struct list_head item, struct list_lru_one lru,
				365	spinlock_t lock, void arg)
				366	__releases(lock)
				367	__acquires(lock)
				368	{
				369	struct list_head *head = arg;
				370	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
				371
				372	/*
				373	* Do a lockless refcount check. The hashtable holds one reference, so
				374	* we look to see if anything else has a reference, or if any have
				375	* been put since the shrinker last ran. Those don't get unhashed and
				376	* released.
				377	*
				378	* Note that in the put path, we set the flag and then decrement the
				379	* counter. Here we check the counter and then test and clear the flag.
				380	* That order is deliberate to ensure that we can do this locklessly.
				381	*/
				382	if (atomic_read(&nf->nf_ref) > 1)
				383	goto out_skip;
				384
				385	/*
				386	* Don't throw out files that are still undergoing I/O or
				387	* that have uncleared errors pending.
				388	*/
				389	if (nfsd_file_check_writeback(nf))
				390	goto out_skip;
				391
				392	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
				393	goto out_rescan;
				394
				395	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
				396	goto out_skip;
				397
				398	list_lru_isolate_move(lru, &nf->nf_lru, head);
				399	return LRU_REMOVED;
				400	out_rescan:
				401	set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
				402	out_skip:
				403	return LRU_SKIP;
				404	}
				405
				406	static void
				407	nfsd_file_lru_dispose(struct list_head *head)
				408	{
				409	while(!list_empty(head)) {
				410	struct nfsd_file *nf = list_first_entry(head,
				411	struct nfsd_file, nf_lru);
				412	list_del_init(&nf->nf_lru);
				413	spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
				414	nfsd_file_do_unhash(nf);
				415	spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
				416	nfsd_file_put_noref(nf);
				417	}
				418	}
				419
				420	static unsigned long
				421	nfsd_file_lru_count(struct shrinker s, struct shrink_control sc)
				422	{
				423	return list_lru_count(&nfsd_file_lru);
				424	}
				425
				426	static unsigned long
				427	nfsd_file_lru_scan(struct shrinker s, struct shrink_control sc)
				428	{
				429	LIST_HEAD(head);
				430	unsigned long ret;
				431
				432	ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
				433	nfsd_file_lru_dispose(&head);
				434	return ret;
				435	}
				436
				437	static struct shrinker nfsd_file_shrinker = {
				438	.scan_objects = nfsd_file_lru_scan,
				439	.count_objects = nfsd_file_lru_count,
				440	.seeks = 1,
				441	};
				442
				443	static void
				444	__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
				445	struct list_head *dispose)
				446	{
				447	struct nfsd_file *nf;
				448	struct hlist_node *tmp;
				449
				450	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
				451	hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
				452	if (inode == nf->nf_inode)
				453	nfsd_file_unhash_and_release_locked(nf, dispose);
				454	}
				455	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
				456	}
				457
				458	/**
				459	* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
				460	* @inode: inode of the file to attempt to remove
				461	*
				462	* Walk the whole hash bucket, looking for any files that correspond to "inode".
				463	* If any do, then unhash them and put the hashtable reference to them and
				464	* destroy any that had their last reference put. Also ensure that any of the
				465	* fputs also have their final __fput done as well.
				466	*/
				467	void
				468	nfsd_file_close_inode_sync(struct inode *inode)
				469	{
				470	unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
				471	NFSD_FILE_HASH_BITS);
				472	LIST_HEAD(dispose);
				473
				474	__nfsd_file_close_inode(inode, hashval, &dispose);
				475	trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
				476	nfsd_file_dispose_list_sync(&dispose);
				477	}
				478
				479	/**
				480	* nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
				481	* @inode: inode of the file to attempt to remove
				482	*
				483	* Walk the whole hash bucket, looking for any files that correspond to "inode".
				484	* If any do, then unhash them and put the hashtable reference to them and
				485	* destroy any that had their last reference put.
				486	*/
				487	static void
				488	nfsd_file_close_inode(struct inode *inode)
				489	{
				490	unsigned int hashval = (unsigned int)hash_long(inode->i_ino,
				491	NFSD_FILE_HASH_BITS);
				492	LIST_HEAD(dispose);
				493
				494	__nfsd_file_close_inode(inode, hashval, &dispose);
				495	trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
				496	nfsd_file_dispose_list(&dispose);
				497	}
				498
				499	/**
				500	* nfsd_file_delayed_close - close unused nfsd_files
				501	* @work: dummy
				502	*
				503	* Walk the LRU list and close any entries that have not been used since
				504	* the last scan.
				505	*
				506	* Note this can deadlock with nfsd_file_cache_purge.
				507	*/
				508	static void
				509	nfsd_file_delayed_close(struct work_struct *work)
				510	{
				511	LIST_HEAD(head);
				512
				513	list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
				514
				515	if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
				516	nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
				517
				518	if (!list_empty(&head)) {
				519	nfsd_file_lru_dispose(&head);
				520	flush_delayed_fput();
				521	}
				522	}
				523
				524	static int
				525	nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
				526	void *data)
				527	{
				528	struct file_lock *fl = data;
				529
				530	/* Only close files for F_SETLEASE leases */
				531	if (fl->fl_flags & FL_LEASE)
				532	nfsd_file_close_inode_sync(file_inode(fl->fl_file));
				533	return 0;
				534	}
				535
				536	static struct notifier_block nfsd_file_lease_notifier = {
				537	.notifier_call = nfsd_file_lease_notifier_call,
				538	};
				539
				540	static int
				541	nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
				542	struct inode *inode,
				543	u32 mask, const void *data, int data_type,
				544	const struct qstr *file_name, u32 cookie,
				545	struct fsnotify_iter_info *iter_info)
				546	{
				547	trace_nfsd_file_fsnotify_handle_event(inode, mask);
				548
				549	/* Should be no marks on non-regular files */
				550	if (!S_ISREG(inode->i_mode)) {
				551	WARN_ON_ONCE(1);
				552	return 0;
				553	}
				554
				555	/* don't close files if this was not the last link */
				556	if (mask & FS_ATTRIB) {
				557	if (inode->i_nlink)
				558	return 0;
				559	}
				560
				561	nfsd_file_close_inode(inode);
				562	return 0;
				563	}
				564
				565
				566	static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
				567	.handle_event = nfsd_file_fsnotify_handle_event,
				568	.free_mark = nfsd_file_mark_free,
				569	};
				570
				571	int
				572	nfsd_file_cache_init(void)
				573	{
				574	int ret = -ENOMEM;
				575	unsigned int i;
				576
				577	clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
				578
				579	if (nfsd_file_hashtbl)
				580	return 0;
				581
				582	nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
				583	sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
				584	if (!nfsd_file_hashtbl) {
				585	pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
				586	goto out_err;
				587	}
				588
				589	nfsd_file_slab = kmem_cache_create("nfsd_file",
				590	sizeof(struct nfsd_file), 0, 0, NULL);
				591	if (!nfsd_file_slab) {
				592	pr_err("nfsd: unable to create nfsd_file_slab\n");
				593	goto out_err;
				594	}
				595
				596	nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
				597	sizeof(struct nfsd_file_mark), 0, 0, NULL);
				598	if (!nfsd_file_mark_slab) {
				599	pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
				600	goto out_err;
				601	}
				602
				603
				604	ret = list_lru_init(&nfsd_file_lru);
				605	if (ret) {
				606	pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
				607	goto out_err;
				608	}
				609
				610	ret = register_shrinker(&nfsd_file_shrinker);
				611	if (ret) {
				612	pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
				613	goto out_lru;
				614	}
				615
				616	ret = lease_register_notifier(&nfsd_file_lease_notifier);
				617	if (ret) {
				618	pr_err("nfsd: unable to register lease notifier: %d\n", ret);
				619	goto out_shrinker;
				620	}
				621
				622	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
				623	if (IS_ERR(nfsd_file_fsnotify_group)) {
				624	pr_err("nfsd: unable to create fsnotify group: %ld\n",
				625	PTR_ERR(nfsd_file_fsnotify_group));
				626	nfsd_file_fsnotify_group = NULL;
				627	goto out_notifier;
				628	}
				629
				630	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
				631	INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
				632	spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
				633	}
				634
				635	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
				636	out:
				637	return ret;
				638	out_notifier:
				639	lease_unregister_notifier(&nfsd_file_lease_notifier);
				640	out_shrinker:
				641	unregister_shrinker(&nfsd_file_shrinker);
				642	out_lru:
				643	list_lru_destroy(&nfsd_file_lru);
				644	out_err:
				645	kmem_cache_destroy(nfsd_file_slab);
				646	nfsd_file_slab = NULL;
				647	kmem_cache_destroy(nfsd_file_mark_slab);
				648	nfsd_file_mark_slab = NULL;
				649	kfree(nfsd_file_hashtbl);
				650	nfsd_file_hashtbl = NULL;
				651	goto out;
				652	}
				653
				654	/*
				655	* Note this can deadlock with nfsd_file_lru_cb.
				656	*/
				657	void
				658	nfsd_file_cache_purge(struct net *net)
				659	{
				660	unsigned int i;
				661	struct nfsd_file *nf;
				662	struct hlist_node *next;
				663	LIST_HEAD(dispose);
				664	bool del;
				665
				666	if (!nfsd_file_hashtbl)
				667	return;
				668
				669	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
				670	struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
				671
				672	spin_lock(&nfb->nfb_lock);
				673	hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
				674	if (net && nf->nf_net != net)
				675	continue;
				676	del = nfsd_file_unhash_and_release_locked(nf, &dispose);
				677
				678	/*
				679	* Deadlock detected! Something marked this entry as
				680	* unhased, but hasn't removed it from the hash list.
				681	*/
				682	WARN_ON_ONCE(!del);
				683	}
				684	spin_unlock(&nfb->nfb_lock);
				685	nfsd_file_dispose_list(&dispose);
				686	}
				687	}
				688
				689	void
				690	nfsd_file_cache_shutdown(void)
				691	{
				692	LIST_HEAD(dispose);
				693
				694	set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
				695
				696	lease_unregister_notifier(&nfsd_file_lease_notifier);
				697	unregister_shrinker(&nfsd_file_shrinker);
				698	/*
				699	* make sure all callers of nfsd_file_lru_cb are done before
				700	* calling nfsd_file_cache_purge
				701	*/
				702	cancel_delayed_work_sync(&nfsd_filecache_laundrette);
				703	nfsd_file_cache_purge(NULL);
				704	list_lru_destroy(&nfsd_file_lru);
				705	rcu_barrier();
				706	fsnotify_put_group(nfsd_file_fsnotify_group);
				707	nfsd_file_fsnotify_group = NULL;
				708	kmem_cache_destroy(nfsd_file_slab);
				709	nfsd_file_slab = NULL;
				710	fsnotify_wait_marks_destroyed();
				711	kmem_cache_destroy(nfsd_file_mark_slab);
				712	nfsd_file_mark_slab = NULL;
				713	kfree(nfsd_file_hashtbl);
				714	nfsd_file_hashtbl = NULL;
				715	}
				716
				717	static bool
				718	nfsd_match_cred(const struct cred c1, const struct cred c2)
				719	{
				720	int i;
				721
				722	if (!uid_eq(c1->fsuid, c2->fsuid))
				723	return false;
				724	if (!gid_eq(c1->fsgid, c2->fsgid))
				725	return false;
				726	if (c1->group_info == NULL \|\| c2->group_info == NULL)
				727	return c1->group_info == c2->group_info;
				728	if (c1->group_info->ngroups != c2->group_info->ngroups)
				729	return false;
				730	for (i = 0; i < c1->group_info->ngroups; i++) {
				731	if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
				732	return false;
				733	}
				734	return true;
				735	}
				736
				737	static struct nfsd_file *
				738	nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
				739	unsigned int hashval, struct net *net)
				740	{
				741	struct nfsd_file *nf;
				742	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
				743
				744	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
				745	nf_node) {
				746	if ((need & nf->nf_may) != need)
				747	continue;
				748	if (nf->nf_inode != inode)
				749	continue;
				750	if (nf->nf_net != net)
				751	continue;
				752	if (!nfsd_match_cred(nf->nf_cred, current_cred()))
				753	continue;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	754	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
				755	continue;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	756	if (nfsd_file_get(nf) != NULL)
				757	return nf;
				758	}
				759	return NULL;
				760	}
				761
				762	/**
				763	* nfsd_file_is_cached - are there any cached open files for this fh?
				764	* @inode: inode of the file to check
				765	*
				766	* Scan the hashtable for open files that match this fh. Returns true if there
				767	* are any, and false if not.
				768	*/
				769	bool
				770	nfsd_file_is_cached(struct inode *inode)
				771	{
				772	bool ret = false;
				773	struct nfsd_file *nf;
				774	unsigned int hashval;
				775
				776	hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
				777
				778	rcu_read_lock();
				779	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
				780	nf_node) {
				781	if (inode == nf->nf_inode) {
				782	ret = true;
				783	break;
				784	}
				785	}
				786	rcu_read_unlock();
				787	trace_nfsd_file_is_cached(inode, hashval, (int)ret);
				788	return ret;
				789	}
				790
				791	__be32
				792	nfsd_file_acquire(struct svc_rqst rqstp, struct svc_fh fhp,
				793	unsigned int may_flags, struct nfsd_file **pnf)
				794	{
				795	__be32 status;
				796	struct net *net = SVC_NET(rqstp);
				797	struct nfsd_file nf, new;
				798	struct inode *inode;
				799	unsigned int hashval;
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	800	bool retry = true;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	801
				802	/* FIXME: skip this if fh_dentry is already set? */
				803	status = fh_verify(rqstp, fhp, S_IFREG,
				804	may_flags\|NFSD_MAY_OWNER_OVERRIDE);
				805	if (status != nfs_ok)
				806	return status;
				807
				808	inode = d_inode(fhp->fh_dentry);
				809	hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
				810	retry:
				811	rcu_read_lock();
				812	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
				813	rcu_read_unlock();
				814	if (nf)
				815	goto wait_for_construction;
				816
				817	new = nfsd_file_alloc(inode, may_flags, hashval, net);
				818	if (!new) {
				819	trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
				820	NULL, nfserr_jukebox);
				821	return nfserr_jukebox;
				822	}
				823
				824	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
				825	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
				826	if (nf == NULL)
				827	goto open_file;
				828	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
				829	nfsd_file_slab_free(&new->nf_rcu);
				830
				831	wait_for_construction:
				832	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
				833
				834	/* Did construction of this file fail? */
				835	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
Olivier Deprez	0e64123	2021-09-23 10:07:05 +0200	[diff] [blame^]	836	if (!retry) {
				837	status = nfserr_jukebox;
				838	goto out;
				839	}
				840	retry = false;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	841	nfsd_file_put_noref(nf);
				842	goto retry;
				843	}
				844
				845	this_cpu_inc(nfsd_file_cache_hits);
				846
				847	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
				848	bool write = (may_flags & NFSD_MAY_WRITE);
				849
				850	if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) \|\|
				851	(test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
				852	status = nfserrno(nfsd_open_break_lease(
				853	file_inode(nf->nf_file), may_flags));
				854	if (status == nfs_ok) {
				855	clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
				856	if (write)
				857	clear_bit(NFSD_FILE_BREAK_WRITE,
				858	&nf->nf_flags);
				859	}
				860	}
				861	}
				862	out:
				863	if (status == nfs_ok) {
				864	*pnf = nf;
				865	} else {
				866	nfsd_file_put(nf);
				867	nf = NULL;
				868	}
				869
				870	trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
				871	return status;
				872	open_file:
				873	nf = new;
				874	/* Take reference for the hashtable */
				875	atomic_inc(&nf->nf_ref);
				876	__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
				877	__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
				878	list_lru_add(&nfsd_file_lru, &nf->nf_lru);
				879	hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
				880	++nfsd_file_hashtbl[hashval].nfb_count;
				881	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
				882	nfsd_file_hashtbl[hashval].nfb_count);
				883	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
				884	atomic_long_inc(&nfsd_filecache_count);
				885
				886	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
				887	if (nf->nf_mark)
				888	status = nfsd_open_verified(rqstp, fhp, S_IFREG,
				889	may_flags, &nf->nf_file);
				890	else
				891	status = nfserr_jukebox;
				892	/*
				893	* If construction failed, or we raced with a call to unlink()
				894	* then unhash.
				895	*/
				896	if (status != nfs_ok \|\| inode->i_nlink == 0) {
				897	bool do_free;
				898	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
				899	do_free = nfsd_file_unhash(nf);
				900	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
				901	if (do_free)
				902	nfsd_file_put_noref(nf);
				903	}
				904	clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
				905	smp_mb__after_atomic();
				906	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
				907	goto out;
				908	}
				909
				910	/*
				911	* Note that fields may be added, removed or reordered in the future. Programs
				912	* scraping this file for info should test the labels to ensure they're
				913	* getting the correct field.
				914	*/
				915	static int nfsd_file_cache_stats_show(struct seq_file m, void v)
				916	{
				917	unsigned int i, count = 0, longest = 0;
				918	unsigned long hits = 0;
				919
				920	/*
				921	* No need for spinlocks here since we're not terribly interested in
				922	* accuracy. We do take the nfsd_mutex simply to ensure that we
				923	* don't end up racing with server shutdown
				924	*/
				925	mutex_lock(&nfsd_mutex);
				926	if (nfsd_file_hashtbl) {
				927	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
				928	count += nfsd_file_hashtbl[i].nfb_count;
				929	longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
				930	}
				931	}
				932	mutex_unlock(&nfsd_mutex);
				933
				934	for_each_possible_cpu(i)
				935	hits += per_cpu(nfsd_file_cache_hits, i);
				936
				937	seq_printf(m, "total entries: %u\n", count);
				938	seq_printf(m, "longest chain: %u\n", longest);
				939	seq_printf(m, "cache hits: %lu\n", hits);
				940	return 0;
				941	}
				942
				943	int nfsd_file_cache_stats_open(struct inode inode, struct file file)
				944	{
				945	return single_open(file, nfsd_file_cache_stats_show, NULL);
				946	}