Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 2a24249..c020d26 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config FSNOTIFY
 	def_bool n
 	select SRCU
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index f9c1ca1..3df7def 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config DNOTIFY
 	bool "Dnotify support"
 	select FSNOTIFY
diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile
index f145251..121b4dd 100644
--- a/fs/notify/dnotify/Makefile
+++ b/fs/notify/dnotify/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DNOTIFY)		+= dnotify.o
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 58d77dc..7a42c2e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Directory notifications for Linux.
  *
@@ -5,16 +6,6 @@
  *
  * Copyright (C) 2009 Eric Paris <Red Hat Inc>
  * dnotify was largly rewritten to use the new fsnotify infrastructure
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 #include <linux/fs.h>
 #include <linux/module.h>
@@ -22,6 +13,7 @@
 #include <linux/sched/signal.h>
 #include <linux/dnotify.h>
 #include <linux/init.h>
+#include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
@@ -81,7 +73,7 @@
 static int dnotify_handle_event(struct fsnotify_group *group,
 				struct inode *inode,
 				u32 mask, const void *data, int data_type,
-				const unsigned char *file_name, u32 cookie,
+				const struct qstr *file_name, u32 cookie,
 				struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
@@ -288,6 +280,17 @@
 		goto out_err;
 	}
 
+	/*
+	 * convert the userspace DN_* "arg" to the internal FS_*
+	 * defined in fsnotify
+	 */
+	mask = convert_arg(arg);
+
+	error = security_path_notify(&filp->f_path, mask,
+			FSNOTIFY_OBJ_TYPE_INODE);
+	if (error)
+		goto out_err;
+
 	/* expect most fcntl to add new rather than augment old */
 	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
 	if (!dn) {
@@ -302,9 +305,6 @@
 		goto out_err;
 	}
 
-	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
-	mask = convert_arg(arg);
-
 	/* set up the new_fsn_mark and new_dn_mark */
 	new_fsn_mark = &new_dn_mark->fsn_mark;
 	fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 41355ce..8b9103f 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,7 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config FANOTIFY
 	bool "Filesystem wide access notification"
 	select FSNOTIFY
-	select ANON_INODES
+	select EXPORTFS
 	default n
 	---help---
 	   Say Y here to enable fanotify support.  fanotify is a file access
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
index 0999213..25ef222 100644
--- a/fs/notify/fanotify/Makefile
+++ b/fs/notify/fanotify/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_FANOTIFY)		+= fanotify.o fanotify_user.o
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 29dee96..5778d13 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -13,22 +13,40 @@
 #include <linux/wait.h>
 #include <linux/audit.h>
 #include <linux/sched/mm.h>
+#include <linux/statfs.h>
 
 #include "fanotify.h"
 
 static bool should_merge(struct fsnotify_event *old_fsn,
 			 struct fsnotify_event *new_fsn)
 {
-	struct fanotify_event_info *old, *new;
+	struct fanotify_event *old, *new;
 
 	pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
 	old = FANOTIFY_E(old_fsn);
 	new = FANOTIFY_E(new_fsn);
 
-	if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
-	    old->path.mnt == new->path.mnt &&
-	    old->path.dentry == new->path.dentry)
-		return true;
+	if (old_fsn->inode != new_fsn->inode || old->pid != new->pid ||
+	    old->fh_type != new->fh_type || old->fh_len != new->fh_len)
+		return false;
+
+	if (fanotify_event_has_path(old)) {
+		return old->path.mnt == new->path.mnt &&
+			old->path.dentry == new->path.dentry;
+	} else if (fanotify_event_has_fid(old)) {
+		/*
+		 * We want to merge many dirent events in the same dir (i.e.
+		 * creates/unlinks/renames), but we do not want to merge dirent
+		 * events referring to subdirs with dirent events referring to
+		 * non subdirs, otherwise, user won't be able to tell from a
+		 * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
+		 * unlink pair or rmdir+create pair of events.
+		 */
+		return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) &&
+			fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
+	}
+
+	/* Do not merge events if we failed to encode fid */
 	return false;
 }
 
@@ -36,20 +54,22 @@
 static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 {
 	struct fsnotify_event *test_event;
+	struct fanotify_event *new;
 
 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+	new = FANOTIFY_E(event);
 
 	/*
 	 * Don't merge a permission event with any other event so that we know
 	 * the event structure we have created in fanotify_handle_event() is the
 	 * one we should check for permission response.
 	 */
-	if (fanotify_is_perm_event(event->mask))
+	if (fanotify_is_perm_event(new->mask))
 		return 0;
 
 	list_for_each_entry_reverse(test_event, list, list) {
 		if (should_merge(test_event, event)) {
-			test_event->mask |= event->mask;
+			FANOTIFY_E(test_event)->mask |= new->mask;
 			return 1;
 		}
 	}
@@ -57,15 +77,44 @@
 	return 0;
 }
 
+/*
+ * Wait for response to permission event. The function also takes care of
+ * freeing the permission event (or offloads that in case the wait is canceled
+ * by a signal). The function returns 0 in case access got allowed by userspace,
+ * -EPERM in case userspace disallowed the access, and -ERESTARTSYS in case
+ * the wait got interrupted by a signal.
+ */
 static int fanotify_get_response(struct fsnotify_group *group,
-				 struct fanotify_perm_event_info *event,
+				 struct fanotify_perm_event *event,
 				 struct fsnotify_iter_info *iter_info)
 {
 	int ret;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response);
+	ret = wait_event_killable(group->fanotify_data.access_waitq,
+				  event->state == FAN_EVENT_ANSWERED);
+	/* Signal pending? */
+	if (ret < 0) {
+		spin_lock(&group->notification_lock);
+		/* Event reported to userspace and no answer yet? */
+		if (event->state == FAN_EVENT_REPORTED) {
+			/* Event will get freed once userspace answers to it */
+			event->state = FAN_EVENT_CANCELED;
+			spin_unlock(&group->notification_lock);
+			return ret;
+		}
+		/* Event not yet reported? Just remove it. */
+		if (event->state == FAN_EVENT_INIT)
+			fsnotify_remove_queued_event(group, &event->fae.fse);
+		/*
+		 * Event may be also answered in case signal delivery raced
+		 * with wakeup. In that case we have nothing to do besides
+		 * freeing the event and reporting error.
+		 */
+		spin_unlock(&group->notification_lock);
+		goto out;
+	}
 
 	/* userspace responded, convert to something usable */
 	switch (event->response & ~FAN_AUDIT) {
@@ -81,19 +130,27 @@
 	if (event->response & FAN_AUDIT)
 		audit_fanotify(event->response & ~FAN_AUDIT);
 
-	event->response = 0;
-
 	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
 		 group, event, ret);
-	
+out:
+	fsnotify_destroy_event(group, &event->fae.fse);
+
 	return ret;
 }
 
-static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
-				       u32 event_mask, const void *data,
-				       int data_type)
+/*
+ * This function returns a mask for an event that only contains the flags
+ * that have been specifically requested by the user. Flags that may have
+ * been included within the event mask, but have not been explicitly
+ * requested by the user, will not be present in the returned mask.
+ */
+static u32 fanotify_group_event_mask(struct fsnotify_group *group,
+				     struct fsnotify_iter_info *iter_info,
+				     u32 event_mask, const void *data,
+				     int data_type)
 {
 	__u32 marks_mask = 0, marks_ignored_mask = 0;
+	__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS;
 	const struct path *path = data;
 	struct fsnotify_mark *mark;
 	int type;
@@ -101,14 +158,14 @@
 	pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
 		 __func__, iter_info->report_mask, event_mask, data, data_type);
 
-	/* if we don't have enough info to send an event to userspace say no */
-	if (data_type != FSNOTIFY_EVENT_PATH)
-		return false;
-
-	/* sorry, fanotify only gives a damn about files and dirs */
-	if (!d_is_reg(path->dentry) &&
-	    !d_can_lookup(path->dentry))
-		return false;
+	if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		/* Do we have path to open a file descriptor? */
+		if (data_type != FSNOTIFY_EVENT_PATH)
+			return 0;
+		/* Path type events are only relevant for files and dirs */
+		if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry))
+			return 0;
+	}
 
 	fsnotify_foreach_obj_type(type) {
 		if (!fsnotify_iter_should_report_type(iter_info, type))
@@ -127,55 +184,152 @@
 		marks_ignored_mask |= mark->ignored_mask;
 	}
 
-	if (d_is_dir(path->dentry) &&
+	test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+
+	/*
+	 * dirent modification events (create/delete/move) do not carry the
+	 * child entry name/inode information. Instead, we report FAN_ONDIR
+	 * for mkdir/rmdir so user can differentiate them from creat/unlink.
+	 *
+	 * For backward compatibility and consistency, do not report FAN_ONDIR
+	 * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
+	 * to user in FAN_REPORT_FID mode for all event types.
+	 */
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		/* Do not report FAN_ONDIR without any event */
+		if (!(test_mask & ~FAN_ONDIR))
+			return 0;
+	} else {
+		user_mask &= ~FAN_ONDIR;
+	}
+
+	if (event_mask & FS_ISDIR &&
 	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
-		return false;
+		return 0;
 
-	if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
-				 ~marks_ignored_mask)
-		return true;
-
-	return false;
+	return test_mask & user_mask;
 }
 
-struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
-						 struct inode *inode, u32 mask,
-						 const struct path *path)
+static int fanotify_encode_fid(struct fanotify_event *event,
+			       struct inode *inode, gfp_t gfp,
+			       __kernel_fsid_t *fsid)
 {
-	struct fanotify_event_info *event = NULL;
+	struct fanotify_fid *fid = &event->fid;
+	int dwords, bytes = 0;
+	int err, type;
+
+	fid->ext_fh = NULL;
+	dwords = 0;
+	err = -ENOENT;
+	type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
+	if (!dwords)
+		goto out_err;
+
+	bytes = dwords << 2;
+	if (bytes > FANOTIFY_INLINE_FH_LEN) {
+		/* Treat failure to allocate fh as failure to allocate event */
+		err = -ENOMEM;
+		fid->ext_fh = kmalloc(bytes, gfp);
+		if (!fid->ext_fh)
+			goto out_err;
+	}
+
+	type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
+					&dwords, NULL);
+	err = -EINVAL;
+	if (!type || type == FILEID_INVALID || bytes != dwords << 2)
+		goto out_err;
+
+	fid->fsid = *fsid;
+	event->fh_len = bytes;
+
+	return type;
+
+out_err:
+	pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
+			    "type=%d, bytes=%d, err=%i)\n",
+			    fsid->val[0], fsid->val[1], type, bytes, err);
+	kfree(fid->ext_fh);
+	fid->ext_fh = NULL;
+	event->fh_len = 0;
+
+	return FILEID_INVALID;
+}
+
+/*
+ * The inode to use as identifier when reporting fid depends on the event.
+ * Report the modified directory inode on dirent modification events.
+ * Report the "victim" inode otherwise.
+ * For example:
+ * FS_ATTRIB reports the child inode even if reported on a watched parent.
+ * FS_CREATE reports the modified dir inode and not the created inode.
+ */
+static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask,
+					const void *data, int data_type)
+{
+	if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
+		return to_tell;
+	else if (data_type == FSNOTIFY_EVENT_INODE)
+		return (struct inode *)data;
+	else if (data_type == FSNOTIFY_EVENT_PATH)
+		return d_inode(((struct path *)data)->dentry);
+	return NULL;
+}
+
+struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+					    struct inode *inode, u32 mask,
+					    const void *data, int data_type,
+					    __kernel_fsid_t *fsid)
+{
+	struct fanotify_event *event = NULL;
 	gfp_t gfp = GFP_KERNEL_ACCOUNT;
+	struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
 
 	/*
 	 * For queues with unlimited length lost events are not expected and
 	 * can possibly have security implications. Avoid losing events when
-	 * memory is short.
+	 * memory is short. For the limited size queues, avoid OOM killer in the
+	 * target monitoring memcg as it may have security repercussion.
 	 */
 	if (group->max_events == UINT_MAX)
 		gfp |= __GFP_NOFAIL;
+	else
+		gfp |= __GFP_RETRY_MAYFAIL;
 
 	/* Whoever is interested in the event, pays for the allocation. */
 	memalloc_use_memcg(group->memcg);
 
 	if (fanotify_is_perm_event(mask)) {
-		struct fanotify_perm_event_info *pevent;
+		struct fanotify_perm_event *pevent;
 
 		pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
 		if (!pevent)
 			goto out;
 		event = &pevent->fae;
 		pevent->response = 0;
+		pevent->state = FAN_EVENT_INIT;
 		goto init;
 	}
 	event = kmem_cache_alloc(fanotify_event_cachep, gfp);
 	if (!event)
 		goto out;
 init: __maybe_unused
-	fsnotify_init_event(&event->fse, inode, mask);
-	event->tgid = get_pid(task_tgid(current));
-	if (path) {
-		event->path = *path;
+	fsnotify_init_event(&event->fse, inode);
+	event->mask = mask;
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
+		event->pid = get_pid(task_pid(current));
+	else
+		event->pid = get_pid(task_tgid(current));
+	event->fh_len = 0;
+	if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		/* Report the event without a file identifier on encode error */
+		event->fh_type = fanotify_encode_fid(event, id, gfp, fsid);
+	} else if (data_type == FSNOTIFY_EVENT_PATH) {
+		event->fh_type = FILEID_ROOT;
+		event->path = *((struct path *)data);
 		path_get(&event->path);
 	} else {
+		event->fh_type = FILEID_INVALID;
 		event->path.mnt = NULL;
 		event->path.dentry = NULL;
 	}
@@ -184,28 +338,75 @@
 	return event;
 }
 
+/*
+ * Get cached fsid of the filesystem containing the object from any connector.
+ * All connectors are supposed to have the same fsid, but we do not verify that
+ * here.
+ */
+static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
+{
+	int type;
+	__kernel_fsid_t fsid = {};
+
+	fsnotify_foreach_obj_type(type) {
+		struct fsnotify_mark_connector *conn;
+
+		if (!fsnotify_iter_should_report_type(iter_info, type))
+			continue;
+
+		conn = READ_ONCE(iter_info->marks[type]->connector);
+		/* Mark is just getting destroyed or created? */
+		if (!conn)
+			continue;
+		if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID))
+			continue;
+		/* Pairs with smp_wmb() in fsnotify_add_mark_list() */
+		smp_rmb();
+		fsid = conn->fsid;
+		if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
+			continue;
+		return fsid;
+	}
+
+	return fsid;
+}
+
 static int fanotify_handle_event(struct fsnotify_group *group,
 				 struct inode *inode,
 				 u32 mask, const void *data, int data_type,
-				 const unsigned char *file_name, u32 cookie,
+				 const struct qstr *file_name, u32 cookie,
 				 struct fsnotify_iter_info *iter_info)
 {
 	int ret = 0;
-	struct fanotify_event_info *event;
+	struct fanotify_event *event;
 	struct fsnotify_event *fsn_event;
+	__kernel_fsid_t fsid = {};
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB);
 	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
 	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
 	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(FAN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(FAN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF);
 	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
 	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
 	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
 	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+	BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
+	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
 
-	if (!fanotify_should_send_event(iter_info, mask, data, data_type))
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
+
+	mask = fanotify_group_event_mask(group, iter_info, mask, data,
+					 data_type);
+	if (!mask)
 		return 0;
 
 	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
@@ -220,7 +421,15 @@
 			return 0;
 	}
 
-	event = fanotify_alloc_event(group, inode, mask, data);
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		fsid = fanotify_get_fsid(iter_info);
+		/* Racing with mark destruction or creation? */
+		if (!fsid.val[0] && !fsid.val[1])
+			return 0;
+	}
+
+	event = fanotify_alloc_event(group, inode, mask, data, data_type,
+				     &fsid);
 	ret = -ENOMEM;
 	if (unlikely(!event)) {
 		/*
@@ -236,7 +445,7 @@
 	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
-		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
+		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
 
@@ -244,7 +453,6 @@
 	} else if (fanotify_is_perm_event(mask)) {
 		ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
 					    iter_info);
-		fsnotify_destroy_event(group, fsn_event);
 	}
 finish:
 	if (fanotify_is_perm_event(mask))
@@ -264,12 +472,15 @@
 
 static void fanotify_free_event(struct fsnotify_event *fsn_event)
 {
-	struct fanotify_event_info *event;
+	struct fanotify_event *event;
 
 	event = FANOTIFY_E(fsn_event);
-	path_put(&event->path);
-	put_pid(event->tgid);
-	if (fanotify_is_perm_event(fsn_event->mask)) {
+	if (fanotify_event_has_path(event))
+		path_put(&event->path);
+	else if (fanotify_event_has_ext_fh(event))
+		kfree(event->fid.ext_fh);
+	put_pid(event->pid);
+	if (fanotify_is_perm_event(event->mask)) {
 		kmem_cache_free(fanotify_perm_event_cachep,
 				FANOTIFY_PE(fsn_event));
 		return;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 8609ba0..68b3050 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -2,26 +2,112 @@
 #include <linux/fsnotify_backend.h>
 #include <linux/path.h>
 #include <linux/slab.h>
+#include <linux/exportfs.h>
 
 extern struct kmem_cache *fanotify_mark_cache;
 extern struct kmem_cache *fanotify_event_cachep;
 extern struct kmem_cache *fanotify_perm_event_cachep;
 
+/* Possible states of the permission event */
+enum {
+	FAN_EVENT_INIT,
+	FAN_EVENT_REPORTED,
+	FAN_EVENT_ANSWERED,
+	FAN_EVENT_CANCELED,
+};
+
+/*
+ * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation).
+ * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and
+ * fh_* fields increase the size of fanotify_event by another 4 bytes.
+ * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and
+ * fh_* fields are packed in a hole after mask.
+ */
+#if BITS_PER_LONG == 32
+#define FANOTIFY_INLINE_FH_LEN	(3 << 2)
+#else
+#define FANOTIFY_INLINE_FH_LEN	(4 << 2)
+#endif
+
+struct fanotify_fid {
+	__kernel_fsid_t fsid;
+	union {
+		unsigned char fh[FANOTIFY_INLINE_FH_LEN];
+		unsigned char *ext_fh;
+	};
+};
+
+static inline void *fanotify_fid_fh(struct fanotify_fid *fid,
+				    unsigned int fh_len)
+{
+	return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh;
+}
+
+static inline bool fanotify_fid_equal(struct fanotify_fid *fid1,
+				      struct fanotify_fid *fid2,
+				      unsigned int fh_len)
+{
+	return fid1->fsid.val[0] == fid2->fsid.val[0] &&
+		fid1->fsid.val[1] == fid2->fsid.val[1] &&
+		!memcmp(fanotify_fid_fh(fid1, fh_len),
+			fanotify_fid_fh(fid2, fh_len), fh_len);
+}
+
 /*
  * Structure for normal fanotify events. It gets allocated in
  * fanotify_handle_event() and freed when the information is retrieved by
  * userspace
  */
-struct fanotify_event_info {
+struct fanotify_event {
 	struct fsnotify_event fse;
+	u32 mask;
 	/*
-	 * We hold ref to this path so it may be dereferenced at any point
-	 * during this object's lifetime
+	 * Those fields are outside fanotify_fid to pack fanotify_event nicely
+	 * on 64bit arch and to use fh_type as an indication of whether path
+	 * or fid are used in the union:
+	 * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither.
 	 */
-	struct path path;
-	struct pid *tgid;
+	u8 fh_type;
+	u8 fh_len;
+	u16 pad;
+	union {
+		/*
+		 * We hold ref to this path so it may be dereferenced at any
+		 * point during this object's lifetime
+		 */
+		struct path path;
+		/*
+		 * With FAN_REPORT_FID, we do not hold any reference on the
+		 * victim object. Instead we store its NFS file handle and its
+		 * filesystem's fsid as a unique identifier.
+		 */
+		struct fanotify_fid fid;
+	};
+	struct pid *pid;
 };
 
+static inline bool fanotify_event_has_path(struct fanotify_event *event)
+{
+	return event->fh_type == FILEID_ROOT;
+}
+
+static inline bool fanotify_event_has_fid(struct fanotify_event *event)
+{
+	return event->fh_type != FILEID_ROOT &&
+		event->fh_type != FILEID_INVALID;
+}
+
+static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event)
+{
+	return fanotify_event_has_fid(event) &&
+		event->fh_len > FANOTIFY_INLINE_FH_LEN;
+}
+
+static inline void *fanotify_event_fh(struct fanotify_event *event)
+{
+	return fanotify_fid_fh(&event->fid, event->fh_len);
+}
+
 /*
  * Structure for permission fanotify events. It gets allocated and freed in
  * fanotify_handle_event() since we wait there for user response. When the
@@ -29,29 +115,31 @@
  * group->notification_list to group->fanotify_data.access_list to wait for
  * user response.
  */
-struct fanotify_perm_event_info {
-	struct fanotify_event_info fae;
-	int response;	/* userspace answer to question */
+struct fanotify_perm_event {
+	struct fanotify_event fae;
+	unsigned short response;	/* userspace answer to the event */
+	unsigned short state;		/* state of the event */
 	int fd;		/* fd we passed to userspace for this event */
 };
 
-static inline struct fanotify_perm_event_info *
+static inline struct fanotify_perm_event *
 FANOTIFY_PE(struct fsnotify_event *fse)
 {
-	return container_of(fse, struct fanotify_perm_event_info, fae.fse);
+	return container_of(fse, struct fanotify_perm_event, fae.fse);
 }
 
 static inline bool fanotify_is_perm_event(u32 mask)
 {
 	return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
-		mask & FAN_ALL_PERM_EVENTS;
+		mask & FANOTIFY_PERM_EVENTS;
 }
 
-static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
+static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
 {
-	return container_of(fse, struct fanotify_event_info, fse);
+	return container_of(fse, struct fanotify_event, fse);
 }
 
-struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
-						 struct inode *inode, u32 mask,
-						 const struct path *path);
+struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+					    struct inode *inode, u32 mask,
+					    const void *data, int data_type,
+					    __kernel_fsid_t *fsid);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6905488..8508ab5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,8 @@
 #include <linux/compat.h>
 #include <linux/sched/signal.h>
 #include <linux/memcontrol.h>
+#include <linux/statfs.h>
+#include <linux/exportfs.h>
 
 #include <asm/ioctls.h>
 
@@ -47,33 +49,55 @@
 struct kmem_cache *fanotify_event_cachep __read_mostly;
 struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 
+#define FANOTIFY_EVENT_ALIGN 4
+
+static int fanotify_event_info_len(struct fanotify_event *event)
+{
+	if (!fanotify_event_has_fid(event))
+		return 0;
+
+	return roundup(sizeof(struct fanotify_event_info_fid) +
+		       sizeof(struct file_handle) + event->fh_len,
+		       FANOTIFY_EVENT_ALIGN);
+}
+
 /*
  * Get an fsnotify notification event if one exists and is small
  * enough to fit in "count". Return an error pointer if the count
- * is not large enough.
- *
- * Called with the group->notification_lock held.
+ * is not large enough. When permission event is dequeued, its state is
+ * updated accordingly.
  */
 static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 					    size_t count)
 {
-	assert_spin_locked(&group->notification_lock);
+	size_t event_size = FAN_EVENT_METADATA_LEN;
+	struct fsnotify_event *fsn_event = NULL;
 
 	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
 
+	spin_lock(&group->notification_lock);
 	if (fsnotify_notify_queue_is_empty(group))
-		return NULL;
+		goto out;
 
-	if (FAN_EVENT_METADATA_LEN > count)
-		return ERR_PTR(-EINVAL);
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		event_size += fanotify_event_info_len(
+			FANOTIFY_E(fsnotify_peek_first_event(group)));
+	}
 
-	/* held the notification_lock the whole time, so this is the
-	 * same event we peeked above */
-	return fsnotify_remove_first_event(group);
+	if (event_size > count) {
+		fsn_event = ERR_PTR(-EINVAL);
+		goto out;
+	}
+	fsn_event = fsnotify_remove_first_event(group);
+	if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask))
+		FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED;
+out:
+	spin_unlock(&group->notification_lock);
+	return fsn_event;
 }
 
 static int create_fd(struct fsnotify_group *group,
-		     struct fanotify_event_info *event,
+		     struct fanotify_event *event,
 		     struct file **file)
 {
 	int client_fd;
@@ -114,62 +138,32 @@
 	return client_fd;
 }
 
-static int fill_event_metadata(struct fsnotify_group *group,
-			       struct fanotify_event_metadata *metadata,
-			       struct fsnotify_event *fsn_event,
-			       struct file **file)
+/*
+ * Finish processing of permission event by setting it to ANSWERED state and
+ * drop group->notification_lock.
+ */
+static void finish_permission_event(struct fsnotify_group *group,
+				    struct fanotify_perm_event *event,
+				    unsigned int response)
+				    __releases(&group->notification_lock)
 {
-	int ret = 0;
-	struct fanotify_event_info *event;
+	bool destroy = false;
 
-	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
-		 group, metadata, fsn_event);
-
-	*file = NULL;
-	event = container_of(fsn_event, struct fanotify_event_info, fse);
-	metadata->event_len = FAN_EVENT_METADATA_LEN;
-	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
-	metadata->vers = FANOTIFY_METADATA_VERSION;
-	metadata->reserved = 0;
-	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
-	metadata->pid = pid_vnr(event->tgid);
-	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
-		metadata->fd = FAN_NOFD;
-	else {
-		metadata->fd = create_fd(group, event, file);
-		if (metadata->fd < 0)
-			ret = metadata->fd;
-	}
-
-	return ret;
-}
-
-static struct fanotify_perm_event_info *dequeue_event(
-				struct fsnotify_group *group, int fd)
-{
-	struct fanotify_perm_event_info *event, *return_e = NULL;
-
-	spin_lock(&group->notification_lock);
-	list_for_each_entry(event, &group->fanotify_data.access_list,
-			    fae.fse.list) {
-		if (event->fd != fd)
-			continue;
-
-		list_del_init(&event->fae.fse.list);
-		return_e = event;
-		break;
-	}
+	assert_spin_locked(&group->notification_lock);
+	event->response = response;
+	if (event->state == FAN_EVENT_CANCELED)
+		destroy = true;
+	else
+		event->state = FAN_EVENT_ANSWERED;
 	spin_unlock(&group->notification_lock);
-
-	pr_debug("%s: found return_re=%p\n", __func__, return_e);
-
-	return return_e;
+	if (destroy)
+		fsnotify_destroy_event(group, &event->fae.fse);
 }
 
 static int process_access_response(struct fsnotify_group *group,
 				   struct fanotify_response *response_struct)
 {
-	struct fanotify_perm_event_info *event;
+	struct fanotify_perm_event *event;
 	int fd = response_struct->fd;
 	int response = response_struct->response;
 
@@ -191,45 +185,128 @@
 	if (fd < 0)
 		return -EINVAL;
 
-	if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
+	if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
 		return -EINVAL;
 
-	event = dequeue_event(group, fd);
-	if (!event)
-		return -ENOENT;
+	spin_lock(&group->notification_lock);
+	list_for_each_entry(event, &group->fanotify_data.access_list,
+			    fae.fse.list) {
+		if (event->fd != fd)
+			continue;
 
-	event->response = response;
-	wake_up(&group->fanotify_data.access_waitq);
+		list_del_init(&event->fae.fse.list);
+		finish_permission_event(group, event, response);
+		wake_up(&group->fanotify_data.access_waitq);
+		return 0;
+	}
+	spin_unlock(&group->notification_lock);
+
+	return -ENOENT;
+}
+
+static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
+{
+	struct fanotify_event_info_fid info = { };
+	struct file_handle handle = { };
+	unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh;
+	size_t fh_len = event->fh_len;
+	size_t len = fanotify_event_info_len(event);
+
+	if (!len)
+		return 0;
+
+	if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len))
+		return -EFAULT;
+
+	/* Copy event info fid header followed by vaiable sized file handle */
+	info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID;
+	info.hdr.len = len;
+	info.fsid = event->fid.fsid;
+	if (copy_to_user(buf, &info, sizeof(info)))
+		return -EFAULT;
+
+	buf += sizeof(info);
+	len -= sizeof(info);
+	handle.handle_type = event->fh_type;
+	handle.handle_bytes = fh_len;
+	if (copy_to_user(buf, &handle, sizeof(handle)))
+		return -EFAULT;
+
+	buf += sizeof(handle);
+	len -= sizeof(handle);
+	/*
+	 * For an inline fh, copy through stack to exclude the copy from
+	 * usercopy hardening protections.
+	 */
+	fh = fanotify_event_fh(event);
+	if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
+		memcpy(bounce, fh, fh_len);
+		fh = bounce;
+	}
+	if (copy_to_user(buf, fh, fh_len))
+		return -EFAULT;
+
+	/* Pad with 0's */
+	buf += fh_len;
+	len -= fh_len;
+	WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
+	if (len > 0 && clear_user(buf, len))
+		return -EFAULT;
 
 	return 0;
 }
 
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
-				  struct fsnotify_event *event,
-				  char __user *buf)
+				  struct fsnotify_event *fsn_event,
+				  char __user *buf, size_t count)
 {
-	struct fanotify_event_metadata fanotify_event_metadata;
-	struct file *f;
-	int fd, ret;
+	struct fanotify_event_metadata metadata;
+	struct fanotify_event *event;
+	struct file *f = NULL;
+	int ret, fd = FAN_NOFD;
 
-	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+	pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
 
-	ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f);
-	if (ret < 0)
-		return ret;
+	event = container_of(fsn_event, struct fanotify_event, fse);
+	metadata.event_len = FAN_EVENT_METADATA_LEN;
+	metadata.metadata_len = FAN_EVENT_METADATA_LEN;
+	metadata.vers = FANOTIFY_METADATA_VERSION;
+	metadata.reserved = 0;
+	metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
+	metadata.pid = pid_vnr(event->pid);
 
-	fd = fanotify_event_metadata.fd;
+	if (fanotify_event_has_path(event)) {
+		fd = create_fd(group, event, &f);
+		if (fd < 0)
+			return fd;
+	} else if (fanotify_event_has_fid(event)) {
+		metadata.event_len += fanotify_event_info_len(event);
+	}
+	metadata.fd = fd;
+
 	ret = -EFAULT;
-	if (copy_to_user(buf, &fanotify_event_metadata,
-			 fanotify_event_metadata.event_len))
+	/*
+	 * Sanity check copy size in case get_one_event() and
+	 * fill_event_metadata() event_len sizes ever get out of sync.
+	 */
+	if (WARN_ON_ONCE(metadata.event_len > count))
+		goto out_close_fd;
+
+	if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
 		goto out_close_fd;
 
 	if (fanotify_is_perm_event(event->mask))
-		FANOTIFY_PE(event)->fd = fd;
+		FANOTIFY_PE(fsn_event)->fd = fd;
 
-	if (fd != FAN_NOFD)
+	if (fanotify_event_has_path(event)) {
 		fd_install(fd, f);
-	return fanotify_event_metadata.event_len;
+	} else if (fanotify_event_has_fid(event)) {
+		ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN);
+		if (ret < 0)
+			return ret;
+	}
+
+	return metadata.event_len;
 
 out_close_fd:
 	if (fd != FAN_NOFD) {
@@ -270,10 +347,7 @@
 
 	add_wait_queue(&group->notification_waitq, &wait);
 	while (1) {
-		spin_lock(&group->notification_lock);
 		kevent = get_one_event(group, count);
-		spin_unlock(&group->notification_lock);
-
 		if (IS_ERR(kevent)) {
 			ret = PTR_ERR(kevent);
 			break;
@@ -295,7 +369,7 @@
 			continue;
 		}
 
-		ret = copy_event_to_user(group, kevent, buf);
+		ret = copy_event_to_user(group, kevent, buf, count);
 		if (unlikely(ret == -EOPENSTALE)) {
 			/*
 			 * We cannot report events with stale fd so drop it.
@@ -310,11 +384,13 @@
 		 * Permission events get queued to wait for response.  Other
 		 * events can be destroyed now.
 		 */
-		if (!fanotify_is_perm_event(kevent->mask)) {
+		if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) {
 			fsnotify_destroy_event(group, kevent);
 		} else {
 			if (ret <= 0) {
-				FANOTIFY_PE(kevent)->response = FAN_DENY;
+				spin_lock(&group->notification_lock);
+				finish_permission_event(group,
+					FANOTIFY_PE(kevent), FAN_DENY);
 				wake_up(&group->fanotify_data.access_waitq);
 			} else {
 				spin_lock(&group->notification_lock);
@@ -364,7 +440,7 @@
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
-	struct fanotify_perm_event_info *event, *next;
+	struct fanotify_perm_event *event;
 	struct fsnotify_event *fsn_event;
 
 	/*
@@ -379,13 +455,12 @@
 	 * and simulate reply from userspace.
 	 */
 	spin_lock(&group->notification_lock);
-	list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
-				 fae.fse.list) {
-		pr_debug("%s: found group=%p event=%p\n", __func__, group,
-			 event);
-
+	while (!list_empty(&group->fanotify_data.access_list)) {
+		event = list_first_entry(&group->fanotify_data.access_list,
+				struct fanotify_perm_event, fae.fse.list);
 		list_del_init(&event->fae.fse.list);
-		event->response = FAN_ALLOW;
+		finish_permission_event(group, event, FAN_ALLOW);
+		spin_lock(&group->notification_lock);
 	}
 
 	/*
@@ -395,13 +470,14 @@
 	 */
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		fsn_event = fsnotify_remove_first_event(group);
-		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+		if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) {
 			spin_unlock(&group->notification_lock);
 			fsnotify_destroy_event(group, fsn_event);
-			spin_lock(&group->notification_lock);
 		} else {
-			FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+			finish_permission_event(group, FANOTIFY_PE(fsn_event),
+						FAN_ALLOW);
 		}
+		spin_lock(&group->notification_lock);
 	}
 	spin_unlock(&group->notification_lock);
 
@@ -452,7 +528,8 @@
 };
 
 static int fanotify_find_path(int dfd, const char __user *filename,
-			      struct path *path, unsigned int flags)
+			      struct path *path, unsigned int flags, __u64 mask,
+			      unsigned int obj_type)
 {
 	int ret;
 
@@ -491,8 +568,15 @@
 
 	/* you can only watch an inode if you have read permissions on it */
 	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (ret) {
+		path_put(path);
+		goto out;
+	}
+
+	ret = security_path_notify(path, mask, obj_type);
 	if (ret)
 		path_put(path);
+
 out:
 	return ret;
 }
@@ -506,18 +590,10 @@
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask & ~mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask &= ~mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask & ~mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask &= ~mask;
 	}
 	*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
 	spin_unlock(&fsn_mark->lock);
@@ -563,6 +639,13 @@
 				    mask, flags);
 }
 
+static int fanotify_remove_sb_mark(struct fsnotify_group *group,
+				      struct super_block *sb, __u32 mask,
+				      unsigned int flags)
+{
+	return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
+}
+
 static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 				      struct inode *inode, __u32 mask,
 				      unsigned int flags)
@@ -579,19 +662,10 @@
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask | mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask |= mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask | mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask |= mask;
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
@@ -602,7 +676,8 @@
 
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 						   fsnotify_connp_t *connp,
-						   unsigned int type)
+						   unsigned int type,
+						   __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *mark;
 	int ret;
@@ -615,7 +690,7 @@
 		return ERR_PTR(-ENOMEM);
 
 	fsnotify_init_mark(mark, group);
-	ret = fsnotify_add_mark_locked(mark, connp, type, 0);
+	ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
 	if (ret) {
 		fsnotify_put_mark(mark);
 		return ERR_PTR(ret);
@@ -627,7 +702,8 @@
 
 static int fanotify_add_mark(struct fsnotify_group *group,
 			     fsnotify_connp_t *connp, unsigned int type,
-			     __u32 mask, unsigned int flags)
+			     __u32 mask, unsigned int flags,
+			     __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -635,7 +711,7 @@
 	mutex_lock(&group->mark_mutex);
 	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, connp, type);
+		fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
@@ -652,15 +728,23 @@
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 				      struct vfsmount *mnt, __u32 mask,
-				      unsigned int flags)
+				      unsigned int flags, __kernel_fsid_t *fsid)
 {
 	return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
+				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
+}
+
+static int fanotify_add_sb_mark(struct fsnotify_group *group,
+				struct super_block *sb, __u32 mask,
+				unsigned int flags, __kernel_fsid_t *fsid)
+{
+	return fanotify_add_mark(group, &sb->s_fsnotify_marks,
+				 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
 				   struct inode *inode, __u32 mask,
-				   unsigned int flags)
+				   unsigned int flags, __kernel_fsid_t *fsid)
 {
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
@@ -671,11 +755,11 @@
 	 */
 	if ((flags & FAN_MARK_IGNORED_MASK) &&
 	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
-	    (atomic_read(&inode->i_writecount) > 0))
+	    inode_is_open_for_write(inode))
 		return 0;
 
 	return fanotify_add_mark(group, &inode->i_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_INODE, mask, flags);
+				 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
 }
 
 /* fanotify syscalls */
@@ -684,18 +768,18 @@
 	struct fsnotify_group *group;
 	int f_flags, fd;
 	struct user_struct *user;
-	struct fanotify_event_info *oevent;
+	struct fanotify_event *oevent;
 
-	pr_debug("%s: flags=%d event_f_flags=%d\n",
-		__func__, flags, event_f_flags);
+	pr_debug("%s: flags=%x event_f_flags=%x\n",
+		 __func__, flags, event_f_flags);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 #ifdef CONFIG_AUDITSYSCALL
-	if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
+	if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
 #else
-	if (flags & ~FAN_ALL_INIT_FLAGS)
+	if (flags & ~FANOTIFY_INIT_FLAGS)
 #endif
 		return -EINVAL;
 
@@ -711,6 +795,10 @@
 		return -EINVAL;
 	}
 
+	if ((flags & FAN_REPORT_FID) &&
+	    (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF)
+		return -EINVAL;
+
 	user = get_current_user();
 	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
 		free_uid(user);
@@ -731,10 +819,12 @@
 	}
 
 	group->fanotify_data.user = user;
+	group->fanotify_data.flags = flags;
 	atomic_inc(&user->fanotify_listeners);
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
-	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
+	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL,
+				      FSNOTIFY_EVENT_NONE, NULL);
 	if (unlikely(!oevent)) {
 		fd = -ENOMEM;
 		goto out_destroy_group;
@@ -746,7 +836,7 @@
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
-	switch (flags & FAN_ALL_CLASS_BITS) {
+	switch (flags & FANOTIFY_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FS_PRIO_0;
 		break;
@@ -783,7 +873,6 @@
 		fd = -EPERM;
 		if (!capable(CAP_AUDIT_WRITE))
 			goto out_destroy_group;
-		group->fanotify_data.audit = true;
 	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
@@ -797,6 +886,64 @@
 	return fd;
 }
 
+/* Check if filesystem can encode a unique fid */
+static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid)
+{
+	__kernel_fsid_t root_fsid;
+	int err;
+
+	/*
+	 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
+	 */
+	err = vfs_get_fsid(path->dentry, fsid);
+	if (err)
+		return err;
+
+	if (!fsid->val[0] && !fsid->val[1])
+		return -ENODEV;
+
+	/*
+	 * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
+	 * which uses a different fsid than sb root.
+	 */
+	err = vfs_get_fsid(path->dentry->d_sb->s_root, &root_fsid);
+	if (err)
+		return err;
+
+	if (root_fsid.val[0] != fsid->val[0] ||
+	    root_fsid.val[1] != fsid->val[1])
+		return -EXDEV;
+
+	/*
+	 * We need to make sure that the file system supports at least
+	 * encoding a file handle so user can use name_to_handle_at() to
+	 * compare fid returned with event to the file handle of watched
+	 * objects. However, name_to_handle_at() requires that the
+	 * filesystem also supports decoding file handles.
+	 */
+	if (!path->dentry->d_sb->s_export_op ||
+	    !path->dentry->d_sb->s_export_op->fh_to_dentry)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int fanotify_events_supported(struct path *path, __u64 mask)
+{
+	/*
+	 * Some filesystems such as 'proc' acquire unusual locks when opening
+	 * files. For them fanotify permission events have high chances of
+	 * deadlocking the system - open done when reporting fanotify event
+	 * blocks on this "unusual" lock while another process holding the lock
+	 * waits for fanotify permission event to be answered. Just disallow
+	 * permission events for such filesystems.
+	 */
+	if (mask & FANOTIFY_PERM_EVENTS &&
+	    path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
+		return -EINVAL;
+	return 0;
+}
+
 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 			    int dfd, const char  __user *pathname)
 {
@@ -805,7 +952,10 @@
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
+	__kernel_fsid_t __fsid, *fsid = NULL;
+	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
+	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+	unsigned int obj_type;
 	int ret;
 
 	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -815,8 +965,23 @@
 	if (mask & ((__u64)0xffffffff << 32))
 		return -EINVAL;
 
-	if (flags & ~FAN_ALL_MARK_FLAGS)
+	if (flags & ~FANOTIFY_MARK_FLAGS)
 		return -EINVAL;
+
+	switch (mark_type) {
+	case FAN_MARK_INODE:
+		obj_type = FSNOTIFY_OBJ_TYPE_INODE;
+		break;
+	case FAN_MARK_MOUNT:
+		obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+		break;
+	case FAN_MARK_FILESYSTEM:
+		obj_type = FSNOTIFY_OBJ_TYPE_SB;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
 	case FAN_MARK_ADD:		/* fallthrough */
 	case FAN_MARK_REMOVE:
@@ -824,20 +989,15 @@
 			return -EINVAL;
 		break;
 	case FAN_MARK_FLUSH:
-		if (flags & ~(FAN_MARK_MOUNT | FAN_MARK_FLUSH))
+		if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
 			return -EINVAL;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (mask & FAN_ONDIR) {
-		flags |= FAN_MARK_ONDIR;
-		mask &= ~FAN_ONDIR;
-	}
-
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
-		valid_mask |= FAN_ALL_PERM_EVENTS;
+		valid_mask |= FANOTIFY_PERM_EVENTS;
 
 	if (mask & ~valid_mask)
 		return -EINVAL;
@@ -857,25 +1017,54 @@
 	 * allowed to set permissions events.
 	 */
 	ret = -EINVAL;
-	if (mask & FAN_ALL_PERM_EVENTS &&
+	if (mask & FANOTIFY_PERM_EVENTS &&
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
+	/*
+	 * Events with data type inode do not carry enough information to report
+	 * event->fd, so we do not allow setting a mask for inode events unless
+	 * group supports reporting fid.
+	 * inode events are not supported on a mount mark, because they do not
+	 * carry enough information (i.e. path) to be filtered by mount point.
+	 */
+	if (mask & FANOTIFY_INODE_EVENTS &&
+	    (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) ||
+	     mark_type == FAN_MARK_MOUNT))
+		goto fput_and_out;
+
 	if (flags & FAN_MARK_FLUSH) {
 		ret = 0;
-		if (flags & FAN_MARK_MOUNT)
+		if (mark_type == FAN_MARK_MOUNT)
 			fsnotify_clear_vfsmount_marks_by_group(group);
+		else if (mark_type == FAN_MARK_FILESYSTEM)
+			fsnotify_clear_sb_marks_by_group(group);
 		else
 			fsnotify_clear_inode_marks_by_group(group);
 		goto fput_and_out;
 	}
 
-	ret = fanotify_find_path(dfd, pathname, &path, flags);
+	ret = fanotify_find_path(dfd, pathname, &path, flags,
+			(mask & ALL_FSNOTIFY_EVENTS), obj_type);
 	if (ret)
 		goto fput_and_out;
 
+	if (flags & FAN_MARK_ADD) {
+		ret = fanotify_events_supported(&path, mask);
+		if (ret)
+			goto path_put_and_out;
+	}
+
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		ret = fanotify_test_fid(&path, &__fsid);
+		if (ret)
+			goto path_put_and_out;
+
+		fsid = &__fsid;
+	}
+
 	/* inode held in place by reference to path; group by fget on fd */
-	if (!(flags & FAN_MARK_MOUNT))
+	if (mark_type == FAN_MARK_INODE)
 		inode = path.dentry->d_inode;
 	else
 		mnt = path.mnt;
@@ -883,21 +1072,32 @@
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
-		if (flags & FAN_MARK_MOUNT)
-			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
+		if (mark_type == FAN_MARK_MOUNT)
+			ret = fanotify_add_vfsmount_mark(group, mnt, mask,
+							 flags, fsid);
+		else if (mark_type == FAN_MARK_FILESYSTEM)
+			ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
+						   flags, fsid);
 		else
-			ret = fanotify_add_inode_mark(group, inode, mask, flags);
+			ret = fanotify_add_inode_mark(group, inode, mask,
+						      flags, fsid);
 		break;
 	case FAN_MARK_REMOVE:
-		if (flags & FAN_MARK_MOUNT)
-			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
+		if (mark_type == FAN_MARK_MOUNT)
+			ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
+							    flags);
+		else if (mark_type == FAN_MARK_FILESYSTEM)
+			ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
+						      flags);
 		else
-			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
+			ret = fanotify_remove_inode_mark(group, inode, mask,
+							 flags);
 		break;
 	default:
 		ret = -EINVAL;
 	}
 
+path_put_and_out:
 	path_put(&path);
 fput_and_out:
 	fdput(f);
@@ -934,12 +1134,15 @@
  */
 static int __init fanotify_user_setup(void)
 {
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
 					 SLAB_PANIC|SLAB_ACCOUNT);
-	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
+	fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC);
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
 		fanotify_perm_event_cachep =
-			KMEM_CACHE(fanotify_perm_event_info, SLAB_PANIC);
+			KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
 	}
 
 	return 0;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 86fcf58..1e2bfd2 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -15,6 +15,7 @@
 #include <linux/exportfs.h>
 
 #include "inotify/inotify.h"
+#include "fdinfo.h"
 #include "fsnotify.h"
 
 #if defined(CONFIG_PROC_FS)
@@ -131,37 +132,20 @@
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+		struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+			   sb->s_dev, mflags, mark->mask, mark->ignored_mask);
 	}
 }
 
 void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct fsnotify_group *group = f->private_data;
-	unsigned int flags = 0;
-
-	switch (group->priority) {
-	case FS_PRIO_0:
-		flags |= FAN_CLASS_NOTIF;
-		break;
-	case FS_PRIO_1:
-		flags |= FAN_CLASS_CONTENT;
-		break;
-	case FS_PRIO_2:
-		flags |= FAN_CLASS_PRE_CONTENT;
-		break;
-	}
-
-	if (group->max_events == UINT_MAX)
-		flags |= FAN_UNLIMITED_QUEUE;
-
-	if (group->fanotify_data.max_marks == UINT_MAX)
-		flags |= FAN_UNLIMITED_MARKS;
-
-	if (group->fanotify_data.audit)
-		flags |= FAN_ENABLE_AUDIT;
 
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
-		   flags, group->fanotify_data.f_flags);
+		   group->fanotify_data.flags, group->fanotify_data.f_flags);
 
 	show_fdinfo(m, f, fanotify_fdinfo);
 }
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 170a733..2ecef61 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/dcache.h>
@@ -48,7 +35,7 @@
  * Called during unmount with no locks held, so needs to be safe against
  * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
  */
-void fsnotify_unmount_inodes(struct super_block *sb)
+static void fsnotify_unmount_inodes(struct super_block *sb)
 {
 	struct inode *inode, *iput_inode = NULL;
 
@@ -101,6 +88,12 @@
 		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
 }
 
+void fsnotify_sb_delete(struct super_block *sb)
+{
+	fsnotify_unmount_inodes(sb);
+	fsnotify_clear_marks_by_sb(sb);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
@@ -173,10 +166,10 @@
 		take_dentry_name_snapshot(&name, dentry);
 		if (path)
 			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-				       name.name, 0);
+				       &name.name, 0);
 		else
 			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-				       name.name, 0);
+				       &name.name, 0);
 		release_dentry_name_snapshot(&name);
 	}
 
@@ -189,7 +182,7 @@
 static int send_to_group(struct inode *to_tell,
 			 __u32 mask, const void *data,
 			 int data_is, u32 cookie,
-			 const unsigned char *file_name,
+			 const struct qstr *file_name,
 			 struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_group *group = NULL;
@@ -319,21 +312,22 @@
  * notification event in whatever means they feel necessary.
  */
 int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
-	     const unsigned char *file_name, u32 cookie)
+	     const struct qstr *file_name, u32 cookie)
 {
 	struct fsnotify_iter_info iter_info = {};
-	struct mount *mnt;
+	struct super_block *sb = to_tell->i_sb;
+	struct mount *mnt = NULL;
+	__u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
 	int ret = 0;
 	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 
-	if (data_is == FSNOTIFY_EVENT_PATH)
+	if (data_is == FSNOTIFY_EVENT_PATH) {
 		mnt = real_mount(((const struct path *)data)->mnt);
-	else
-		mnt = NULL;
-
-	/* An event "on child" is not intended for a mount mark */
+		mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
+	}
+	/* An event "on child" is not intended for a mount/sb mark */
 	if (mask & FS_EVENT_ON_CHILD)
-		mnt = NULL;
+		mnt_or_sb_mask = 0;
 
 	/*
 	 * Optimization: srcu_read_lock() has a memory barrier which can
@@ -342,31 +336,32 @@
 	 * SRCU because we have no references to any objects and do not
 	 * need SRCU to keep them "alive".
 	 */
-	if (!to_tell->i_fsnotify_marks &&
+	if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
 	    (!mnt || !mnt->mnt_fsnotify_marks))
 		return 0;
 	/*
 	 * if this is a modify event we may need to clear the ignored masks
-	 * otherwise return if neither the inode nor the vfsmount care about
+	 * otherwise return if neither the inode nor the vfsmount/sb care about
 	 * this type of event.
 	 */
 	if (!(mask & FS_MODIFY) &&
-	    !(test_mask & to_tell->i_fsnotify_mask) &&
-	    !(mnt && test_mask & mnt->mnt_fsnotify_mask))
+	    !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
 		return 0;
 
 	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 
 	iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
 		fsnotify_first_mark(&to_tell->i_fsnotify_marks);
+	iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
+		fsnotify_first_mark(&sb->s_fsnotify_marks);
 	if (mnt) {
 		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
 			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
 	}
 
 	/*
-	 * We need to merge inode & vfsmount mark lists so that inode mark
-	 * ignore masks are properly reflected for mount mark notifications.
+	 * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
+	 * ignore masks are properly reflected for mount/sb mark notifications.
 	 * That's why this traversal is so complicated...
 	 */
 	while (fsnotify_iter_select_report_types(&iter_info)) {
@@ -392,7 +387,7 @@
 {
 	int ret;
 
-	BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 25);
 
 	ret = init_srcu_struct(&fsnotify_mark_srcu);
 	if (ret)
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7902653..f346282 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -21,6 +21,12 @@
 	return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
 }
 
+static inline struct super_block *fsnotify_conn_sb(
+				struct fsnotify_mark_connector *conn)
+{
+	return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -43,8 +49,11 @@
 {
 	fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
 }
-/* Wait until all marks queued for destruction are destroyed */
-extern void fsnotify_wait_marks_destroyed(void);
+/* run the list of all marks associated with sb and destroy them */
+static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+{
+	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+}
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
diff --git a/fs/notify/group.c b/fs/notify/group.c
index c03b836..133f723 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/list.h>
@@ -121,6 +108,7 @@
 	if (refcount_dec_and_test(&group->refcnt))
 		fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -150,6 +138,7 @@
 
 	return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b981fc0..6736e47 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,6 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config INOTIFY_USER
 	bool "Inotify support for userspace"
-	select ANON_INODES
 	select FSNOTIFY
 	default y
 	---help---
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index a380dab..812237e 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7e4578d..3f246f7 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -5,6 +5,7 @@
 
 struct inotify_event_info {
 	struct fsnotify_event fse;
+	u32 mask;
 	int wd;
 	u32 sync_cookie;
 	int name_len;
@@ -26,7 +27,7 @@
 extern int inotify_handle_event(struct fsnotify_group *group,
 				struct inode *inode,
 				u32 mask, const void *data, int data_type,
-				const unsigned char *file_name, u32 cookie,
+				const struct qstr *file_name, u32 cookie,
 				struct fsnotify_iter_info *iter_info);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f4184b4..d510223 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * fs/inotify_user.c - inotify support for userspace
  *
@@ -10,16 +11,6 @@
  *
  * Copyright (C) 2009 Eric Paris <Red Hat Inc>
  * inotify was largely rewriten to make use of the fsnotify infrastructure
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #include <linux/dcache.h> /* d_unlinked */
@@ -43,11 +34,11 @@
 {
 	struct inotify_event_info *old, *new;
 
-	if (old_fsn->mask & FS_IN_IGNORED)
-		return false;
 	old = INOTIFY_E(old_fsn);
 	new = INOTIFY_E(new_fsn);
-	if ((old_fsn->mask == new_fsn->mask) &&
+	if (old->mask & FS_IN_IGNORED)
+		return false;
+	if ((old->mask == new->mask) &&
 	    (old_fsn->inode == new_fsn->inode) &&
 	    (old->name_len == new->name_len) &&
 	    (!old->name_len || !strcmp(old->name, new->name)))
@@ -67,7 +58,7 @@
 int inotify_handle_event(struct fsnotify_group *group,
 			 struct inode *inode,
 			 u32 mask, const void *data, int data_type,
-			 const unsigned char *file_name, u32 cookie,
+			 const struct qstr *file_name, u32 cookie,
 			 struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
@@ -89,7 +80,7 @@
 			return 0;
 	}
 	if (file_name) {
-		len = strlen(file_name);
+		len = file_name->len;
 		alloc_len += len + 1;
 	}
 
@@ -99,9 +90,13 @@
 	i_mark = container_of(inode_mark, struct inotify_inode_mark,
 			      fsn_mark);
 
-	/* Whoever is interested in the event, pays for the allocation. */
+	/*
+	 * Whoever is interested in the event, pays for the allocation. Do not
+	 * trigger OOM killer in the target monitoring memcg as it may have
+	 * security repercussion.
+	 */
 	memalloc_use_memcg(group->memcg);
-	event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+	event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
 	memalloc_unuse_memcg();
 
 	if (unlikely(!event)) {
@@ -113,13 +108,23 @@
 		return -ENOMEM;
 	}
 
+	/*
+	 * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events
+	 * for fanotify. inotify never reported IN_ISDIR with those events.
+	 * It looks like an oversight, but to avoid the risk of breaking
+	 * existing inotify programs, mask the flag out from those events.
+	 */
+	if (mask & (IN_MOVE_SELF | IN_DELETE_SELF))
+		mask &= ~IN_ISDIR;
+
 	fsn_event = &event->fse;
-	fsnotify_init_event(fsn_event, inode, mask);
+	fsnotify_init_event(fsn_event, inode);
+	event->mask = mask;
 	event->wd = i_mark->wd;
 	event->sync_cookie = cookie;
 	event->name_len = len;
 	if (len)
-		strcpy(event->name, file_name);
+		strcpy(event->name, file_name->name);
 
 	ret = fsnotify_add_event(group, fsn_event, inotify_merge);
 	if (ret) {
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ac6978d..107537a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * fs/inotify_user.c - inotify support for userspace
  *
@@ -10,16 +11,6 @@
  *
  * Copyright (C) 2009 Eric Paris <Red Hat Inc>
  * inotify was largely rewriten to make use of the fsnotify infrastructure
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #include <linux/file.h>
@@ -39,6 +30,7 @@
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <linux/memcontrol.h>
+#include <linux/security.h>
 
 #include "inotify.h"
 #include "../fdinfo.h"
@@ -54,8 +46,6 @@
 
 #include <linux/sysctl.h>
 
-static int zero;
-
 struct ctl_table inotify_table[] = {
 	{
 		.procname	= "max_user_instances",
@@ -63,7 +53,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "max_user_watches",
@@ -71,7 +61,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= SYSCTL_ZERO,
 	},
 	{
 		.procname	= "max_queued_events",
@@ -79,7 +69,7 @@
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero
+		.extra1		= SYSCTL_ZERO
 	},
 	{ }
 };
@@ -189,7 +179,7 @@
 	 */
 	pad_name_len = round_event_name_len(fsn_event);
 	inotify_event.len = pad_name_len;
-	inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
+	inotify_event.mask = inotify_mask_to_arg(event->mask);
 	inotify_event.wd = event->wd;
 	inotify_event.cookie = event->sync_cookie;
 
@@ -342,7 +332,8 @@
 /*
  * find_inode - resolve a user-given path to a specific inode
  */
-static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+static int inotify_find_inode(const char __user *dirname, struct path *path,
+						unsigned int flags, __u64 mask)
 {
 	int error;
 
@@ -351,8 +342,15 @@
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
 	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error) {
+		path_put(path);
+		return error;
+	}
+	error = security_path_notify(path, mask,
+				FSNOTIFY_OBJ_TYPE_INODE);
 	if (error)
 		path_put(path);
+
 	return error;
 }
 
@@ -519,8 +517,10 @@
 	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark)
 		return -ENOENT;
-	else if (create)
-		return -EEXIST;
+	else if (create) {
+		ret = -EEXIST;
+		goto out;
+	}
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
@@ -548,6 +548,7 @@
 	/* return the wd */
 	ret = i_mark->wd;
 
+out:
 	/* match the get from fsnotify_find_mark() */
 	fsnotify_put_mark(fsn_mark);
 
@@ -634,7 +635,8 @@
 		return ERR_PTR(-ENOMEM);
 	}
 	group->overflow_event = &oevent->fse;
-	fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
+	fsnotify_init_event(group->overflow_event, NULL);
+	oevent->mask = FS_Q_OVERFLOW;
 	oevent->wd = -1;
 	oevent->sync_cookie = 0;
 	oevent->name_len = 0;
@@ -724,8 +726,10 @@
 		return -EBADF;
 
 	/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
-	if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
-		return -EINVAL;
+	if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE))) {
+		ret = -EINVAL;
+		goto fput_and_out;
+	}
 
 	/* verify that this is indeed an inotify instance */
 	if (unlikely(f.file->f_op != &inotify_fops)) {
@@ -738,7 +742,8 @@
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = inotify_find_inode(pathname, &path, flags);
+	ret = inotify_find_inode(pathname, &path, flags,
+			(mask & IN_ALL_EVENTS));
 	if (ret)
 		goto fput_and_out;
 
@@ -815,7 +820,7 @@
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
-	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
+	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
 					       SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 09535f6..1d96216 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /*
@@ -82,6 +69,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/srcu.h>
+#include <linux/ratelimit.h>
 
 #include <linux/atomic.h>
 
@@ -115,6 +103,8 @@
 		return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
+		return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
 	return NULL;
 }
 
@@ -195,6 +185,8 @@
 		atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+		fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
 	}
 
 	rcu_assign_pointer(*(conn->obj), NULL);
@@ -234,13 +226,13 @@
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
-	struct fsnotify_mark_connector *conn;
+	struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
 	void *objp = NULL;
 	unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	bool free_conn = false;
 
 	/* Catch marks that were actually never attached to object */
-	if (!mark->connector) {
+	if (!conn) {
 		if (refcount_dec_and_test(&mark->refcnt))
 			fsnotify_final_mark_destroy(mark);
 		return;
@@ -250,10 +242,9 @@
 	 * We have to be careful so that traversals of obj_list under lock can
 	 * safely grab mark reference.
 	 */
-	if (!refcount_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+	if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
 		return;
 
-	conn = mark->connector;
 	hlist_del_init_rcu(&mark->obj_list);
 	if (hlist_empty(&conn->list)) {
 		objp = fsnotify_detach_connector_from_object(conn, &type);
@@ -261,7 +252,7 @@
 	} else {
 		__fsnotify_recalc_mask(conn);
 	}
-	mark->connector = NULL;
+	WRITE_ONCE(mark->connector, NULL);
 	spin_unlock(&conn->lock);
 
 	fsnotify_drop_object(type, objp);
@@ -285,6 +276,7 @@
 	queue_delayed_work(system_unbound_wq, &reaper_work,
 			   FSNOTIFY_REAPER_DELAY);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 /*
  * Get mark reference when we found the mark via lockless traversal of object
@@ -439,6 +431,7 @@
 	mutex_unlock(&group->mark_mutex);
 	fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -477,7 +470,8 @@
 }
 
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
-					       unsigned int type)
+					       unsigned int type,
+					       __kernel_fsid_t *fsid)
 {
 	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
@@ -489,6 +483,14 @@
 	INIT_HLIST_HEAD(&conn->list);
 	conn->type = type;
 	conn->obj = connp;
+	/* Cache fsid of filesystem containing the object */
+	if (fsid) {
+		conn->fsid = *fsid;
+		conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID;
+	} else {
+		conn->fsid.val[0] = conn->fsid.val[1] = 0;
+		conn->flags = 0;
+	}
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
 		inode = igrab(fsnotify_conn_inode(conn));
 	/*
@@ -540,7 +542,7 @@
  */
 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 				  fsnotify_connp_t *connp, unsigned int type,
-				  int allow_dups)
+				  int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
@@ -549,15 +551,41 @@
 
 	if (WARN_ON(!fsnotify_valid_obj_type(type)))
 		return -EINVAL;
+
+	/* Backend is expected to check for zero fsid (e.g. tmpfs) */
+	if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
+		return -ENODEV;
+
 restart:
 	spin_lock(&mark->lock);
 	conn = fsnotify_grab_connector(connp);
 	if (!conn) {
 		spin_unlock(&mark->lock);
-		err = fsnotify_attach_connector_to_object(connp, type);
+		err = fsnotify_attach_connector_to_object(connp, type, fsid);
 		if (err)
 			return err;
 		goto restart;
+	} else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) {
+		conn->fsid = *fsid;
+		/* Pairs with smp_rmb() in fanotify_get_fsid() */
+		smp_wmb();
+		conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID;
+	} else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) &&
+		   (fsid->val[0] != conn->fsid.val[0] ||
+		    fsid->val[1] != conn->fsid.val[1])) {
+		/*
+		 * Backend is expected to check for non uniform fsid
+		 * (e.g. btrfs), but maybe we missed something?
+		 * Only allow setting conn->fsid once to non zero fsid.
+		 * inotify and non-fid fanotify groups do not set nor test
+		 * conn->fsid.
+		 */
+		pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
+				    "%x.%x != %x.%x\n", __func__, conn->type,
+				    fsid->val[0], fsid->val[1],
+				    conn->fsid.val[0], conn->fsid.val[1]);
+		err = -EXDEV;
+		goto out_err;
 	}
 
 	/* is mark the first mark? */
@@ -588,7 +616,12 @@
 	/* mark should be the last entry.  last is the current last entry */
 	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 added:
-	mark->connector = conn;
+	/*
+	 * Since connector is attached to object using cmpxchg() we are
+	 * guaranteed that connector initialization is fully visible by anyone
+	 * seeing mark->connector set.
+	 */
+	WRITE_ONCE(mark->connector, conn);
 out_err:
 	spin_unlock(&conn->lock);
 	spin_unlock(&mark->lock);
@@ -602,7 +635,7 @@
  */
 int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 			     fsnotify_connp_t *connp, unsigned int type,
-			     int allow_dups)
+			     int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_group *group = mark->group;
 	int ret = 0;
@@ -623,7 +656,7 @@
 	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
+	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
 	if (ret)
 		goto err;
 
@@ -644,16 +677,17 @@
 }
 
 int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
-		      unsigned int type, int allow_dups)
+		      unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
 {
 	int ret;
 	struct fsnotify_group *group = mark->group;
 
 	mutex_lock(&group->mark_mutex);
-	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
+	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 /*
  * Given a list of marks, find the mark associated with given group. If found
@@ -680,6 +714,7 @@
 	spin_unlock(&conn->lock);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -776,7 +811,9 @@
 	refcount_set(&mark->refcnt, 1);
 	fsnotify_get_group(group);
 	mark->group = group;
+	WRITE_ONCE(mark->connector, NULL);
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -805,3 +842,4 @@
 {
 	flush_delayed_work(&reaper_work);
 }
+EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 3c3e367..75d79d6 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /*
@@ -71,7 +58,7 @@
 			    struct fsnotify_event *event)
 {
 	/* Overflow events are per-group and we don't want to free them */
-	if (!event || event->mask == FS_Q_OVERFLOW)
+	if (!event || event == group->overflow_event)
 		return;
 	/*
 	 * If the event is still queued, we have a problem... Do an unreliable
@@ -141,6 +128,18 @@
 	return ret;
 }
 
+void fsnotify_remove_queued_event(struct fsnotify_group *group,
+				  struct fsnotify_event *event)
+{
+	assert_spin_locked(&group->notification_lock);
+	/*
+	 * We need to init list head for the case of overflow event so that
+	 * check in fsnotify_add_event() works
+	 */
+	list_del_init(&event->list);
+	group->q_len--;
+}
+
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
@@ -155,13 +154,7 @@
 
 	event = list_first_entry(&group->notification_list,
 				 struct fsnotify_event, list);
-	/*
-	 * We need to init list head for the case of overflow event so that
-	 * check in fsnotify_add_event() works
-	 */
-	list_del_init(&event->list);
-	group->q_len--;
-
+	fsnotify_remove_queued_event(group, event);
 	return event;
 }
 
@@ -194,23 +187,3 @@
 	}
 	spin_unlock(&group->notification_lock);
 }
-
-/*
- * fsnotify_create_event - Allocate a new event which will be sent to each
- * group's handle_event function if the group was interested in this
- * particular event.
- *
- * @inode the inode which is supposed to receive the event (sometimes a
- *	parent of the inode to which the event happened.
- * @mask what actually happened.
- * @data pointer to the object which was actually affected
- * @data_type flag indication if the data is a file, path, inode, nothing...
- * @name the filename, if available
- */
-void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
-			 u32 mask)
-{
-	INIT_LIST_HEAD(&event->list);
-	event->inode = inode;
-	event->mask = mask;
-}