Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 7a42c2e..e45ca6e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -70,13 +70,10 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
* destroy the dnotify struct if it was not registered to receive multiple
* events.
*/
-static int dnotify_handle_event(struct fsnotify_group *group,
- struct inode *inode,
- u32 mask, const void *data, int data_type,
- const struct qstr *file_name, u32 cookie,
- struct fsnotify_iter_info *iter_info)
+static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
+ struct inode *inode, struct inode *dir,
+ const struct qstr *name, u32 cookie)
{
- struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
@@ -84,10 +81,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
__u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
/* not a dir, dnotify doesn't care */
- if (!S_ISDIR(inode->i_mode))
- return 0;
-
- if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
+ if (!dir && !(mask & FS_ISDIR))
return 0;
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
@@ -127,7 +121,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
}
static const struct fsnotify_ops dnotify_fsnotify_ops = {
- .handle_event = dnotify_handle_event,
+ .handle_inode_event = dnotify_handle_event,
.free_mark = dnotify_free_mark,
};
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 8b9103f..a511f9d 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -4,7 +4,7 @@
select FSNOTIFY
select EXPORTFS
default n
- ---help---
+ help
Say Y here to enable fanotify support. fanotify is a file access
notification system which differs from inotify in that it sends
an open file descriptor to the userspace listener along with
@@ -17,7 +17,7 @@
depends on FANOTIFY
depends on SECURITY
default n
- ---help---
+ help
Say Y here is you want fanotify listeners to be able to make permissions
decisions concerning filesystem events. This is used by some fanotify
listeners which need to scan files before allowing the system access to
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d24548e..c3af99e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -17,8 +17,79 @@
#include "fanotify.h"
-static bool should_merge(struct fsnotify_event *old_fsn,
- struct fsnotify_event *new_fsn)
+static bool fanotify_path_equal(struct path *p1, struct path *p2)
+{
+ return p1->mnt == p2->mnt && p1->dentry == p2->dentry;
+}
+
+static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
+ __kernel_fsid_t *fsid2)
+{
+ return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
+}
+
+static bool fanotify_fh_equal(struct fanotify_fh *fh1,
+ struct fanotify_fh *fh2)
+{
+ if (fh1->type != fh2->type || fh1->len != fh2->len)
+ return false;
+
+ return !fh1->len ||
+ !memcmp(fanotify_fh_buf(fh1), fanotify_fh_buf(fh2), fh1->len);
+}
+
+static bool fanotify_fid_event_equal(struct fanotify_fid_event *ffe1,
+ struct fanotify_fid_event *ffe2)
+{
+ /* Do not merge fid events without object fh */
+ if (!ffe1->object_fh.len)
+ return false;
+
+ return fanotify_fsid_equal(&ffe1->fsid, &ffe2->fsid) &&
+ fanotify_fh_equal(&ffe1->object_fh, &ffe2->object_fh);
+}
+
+static bool fanotify_info_equal(struct fanotify_info *info1,
+ struct fanotify_info *info2)
+{
+ if (info1->dir_fh_totlen != info2->dir_fh_totlen ||
+ info1->file_fh_totlen != info2->file_fh_totlen ||
+ info1->name_len != info2->name_len)
+ return false;
+
+ if (info1->dir_fh_totlen &&
+ !fanotify_fh_equal(fanotify_info_dir_fh(info1),
+ fanotify_info_dir_fh(info2)))
+ return false;
+
+ if (info1->file_fh_totlen &&
+ !fanotify_fh_equal(fanotify_info_file_fh(info1),
+ fanotify_info_file_fh(info2)))
+ return false;
+
+ return !info1->name_len ||
+ !memcmp(fanotify_info_name(info1), fanotify_info_name(info2),
+ info1->name_len);
+}
+
+static bool fanotify_name_event_equal(struct fanotify_name_event *fne1,
+ struct fanotify_name_event *fne2)
+{
+ struct fanotify_info *info1 = &fne1->info;
+ struct fanotify_info *info2 = &fne2->info;
+
+ /* Do not merge name events without dir fh */
+ if (!info1->dir_fh_totlen)
+ return false;
+
+ if (!fanotify_fsid_equal(&fne1->fsid, &fne2->fsid))
+ return false;
+
+ return fanotify_info_equal(info1, info2);
+}
+
+static bool fanotify_should_merge(struct fsnotify_event *old_fsn,
+ struct fsnotify_event *new_fsn)
{
struct fanotify_event *old, *new;
@@ -26,35 +97,47 @@ static bool should_merge(struct fsnotify_event *old_fsn,
old = FANOTIFY_E(old_fsn);
new = FANOTIFY_E(new_fsn);
- if (old_fsn->objectid != new_fsn->objectid || old->pid != new->pid ||
- old->fh_type != new->fh_type || old->fh_len != new->fh_len)
+ if (old_fsn->objectid != new_fsn->objectid ||
+ old->type != new->type || old->pid != new->pid)
return false;
- if (fanotify_event_has_path(old)) {
- return old->path.mnt == new->path.mnt &&
- old->path.dentry == new->path.dentry;
- } else if (fanotify_event_has_fid(old)) {
- /*
- * We want to merge many dirent events in the same dir (i.e.
- * creates/unlinks/renames), but we do not want to merge dirent
- * events referring to subdirs with dirent events referring to
- * non subdirs, otherwise, user won't be able to tell from a
- * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
- * unlink pair or rmdir+create pair of events.
- */
- return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) &&
- fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
+ /*
+ * We want to merge many dirent events in the same dir (i.e.
+ * creates/unlinks/renames), but we do not want to merge dirent
+ * events referring to subdirs with dirent events referring to
+ * non subdirs, otherwise, user won't be able to tell from a
+ * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
+ * unlink pair or rmdir+create pair of events.
+ */
+ if ((old->mask & FS_ISDIR) != (new->mask & FS_ISDIR))
+ return false;
+
+ switch (old->type) {
+ case FANOTIFY_EVENT_TYPE_PATH:
+ return fanotify_path_equal(fanotify_event_path(old),
+ fanotify_event_path(new));
+ case FANOTIFY_EVENT_TYPE_FID:
+ return fanotify_fid_event_equal(FANOTIFY_FE(old),
+ FANOTIFY_FE(new));
+ case FANOTIFY_EVENT_TYPE_FID_NAME:
+ return fanotify_name_event_equal(FANOTIFY_NE(old),
+ FANOTIFY_NE(new));
+ default:
+ WARN_ON_ONCE(1);
}
- /* Do not merge events if we failed to encode fid */
return false;
}
+/* Limit event merges to limit CPU overhead per event */
+#define FANOTIFY_MAX_MERGE_EVENTS 128
+
/* and the list better be locked by something too! */
static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
{
struct fsnotify_event *test_event;
struct fanotify_event *new;
+ int i = 0;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
new = FANOTIFY_E(event);
@@ -68,7 +151,9 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
return 0;
list_for_each_entry_reverse(test_event, list, list) {
- if (should_merge(test_event, event)) {
+ if (++i > FANOTIFY_MAX_MERGE_EVENTS)
+ break;
+ if (fanotify_should_merge(test_event, event)) {
FANOTIFY_E(test_event)->mask |= new->mask;
return 1;
}
@@ -147,24 +232,30 @@ static int fanotify_get_response(struct fsnotify_group *group,
static u32 fanotify_group_event_mask(struct fsnotify_group *group,
struct fsnotify_iter_info *iter_info,
u32 event_mask, const void *data,
- int data_type)
+ int data_type, struct inode *dir)
{
__u32 marks_mask = 0, marks_ignored_mask = 0;
- __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS;
- const struct path *path = data;
+ __u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS |
+ FANOTIFY_EVENT_FLAGS;
+ const struct path *path = fsnotify_data_path(data, data_type);
+ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
struct fsnotify_mark *mark;
int type;
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);
- if (!FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ if (!fid_mode) {
/* Do we have path to open a file descriptor? */
- if (data_type != FSNOTIFY_EVENT_PATH)
+ if (!path)
return 0;
/* Path type events are only relevant for files and dirs */
if (!d_is_reg(path->dentry) && !d_can_lookup(path->dentry))
return 0;
+ } else if (!(fid_mode & FAN_REPORT_FID)) {
+ /* Do we have a directory inode to report? */
+ if (!dir && !(event_mask & FS_ISDIR))
+ return 0;
}
fsnotify_foreach_obj_type(type) {
@@ -183,12 +274,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
continue;
/*
- * If the event is for a child and this mark doesn't care about
- * events on a child, don't send it!
+ * If the event is on a child and this mark is on a parent not
+ * watching children, don't send it!
*/
- if (event_mask & FS_EVENT_ON_CHILD &&
- (type != FSNOTIFY_OBJ_TYPE_INODE ||
- !(mark->mask & FS_EVENT_ON_CHILD)))
+ if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
+ !(mark->mask & FS_EVENT_ON_CHILD))
continue;
marks_mask |= mark->mask;
@@ -197,69 +287,108 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
test_mask = event_mask & marks_mask & ~marks_ignored_mask;
/*
- * dirent modification events (create/delete/move) do not carry the
- * child entry name/inode information. Instead, we report FAN_ONDIR
- * for mkdir/rmdir so user can differentiate them from creat/unlink.
+ * For dirent modification events (create/delete/move) that do not carry
+ * the child entry name information, we report FAN_ONDIR for mkdir/rmdir
+ * so user can differentiate them from creat/unlink.
*
* For backward compatibility and consistency, do not report FAN_ONDIR
* to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
- * to user in FAN_REPORT_FID mode for all event types.
+ * to user in fid mode for all event types.
+ *
+ * We never report FAN_EVENT_ON_CHILD to user, but we do pass it in to
+ * fanotify_alloc_event() when group is reporting fid as indication
+ * that event happened on child.
*/
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
- /* Do not report FAN_ONDIR without any event */
- if (!(test_mask & ~FAN_ONDIR))
+ if (fid_mode) {
+ /* Do not report event flags without any event */
+ if (!(test_mask & ~FANOTIFY_EVENT_FLAGS))
return 0;
} else {
- user_mask &= ~FAN_ONDIR;
+ user_mask &= ~FANOTIFY_EVENT_FLAGS;
}
return test_mask & user_mask;
}
-static int fanotify_encode_fid(struct fanotify_event *event,
- struct inode *inode, gfp_t gfp,
- __kernel_fsid_t *fsid)
+/*
+ * Check size needed to encode fanotify_fh.
+ *
+ * Return size of encoded fh without fanotify_fh header.
+ * Return 0 on failure to encode.
+ */
+static int fanotify_encode_fh_len(struct inode *inode)
{
- struct fanotify_fid *fid = &event->fid;
- int dwords, bytes = 0;
- int err, type;
+ int dwords = 0;
- fid->ext_fh = NULL;
- dwords = 0;
+ if (!inode)
+ return 0;
+
+ exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
+
+ return dwords << 2;
+}
+
+/*
+ * Encode fanotify_fh.
+ *
+ * Return total size of encoded fh including fanotify_fh header.
+ * Return 0 on failure to encode.
+ */
+static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
+ unsigned int fh_len, gfp_t gfp)
+{
+ int dwords, type = 0;
+ char *ext_buf = NULL;
+ void *buf = fh->buf;
+ int err;
+
+ fh->type = FILEID_ROOT;
+ fh->len = 0;
+ fh->flags = 0;
+ if (!inode)
+ return 0;
+
+ /*
+ * !gpf means preallocated variable size fh, but fh_len could
+ * be zero in that case if encoding fh len failed.
+ */
err = -ENOENT;
- type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
- if (!dwords)
+ if (fh_len < 4 || WARN_ON_ONCE(fh_len % 4))
goto out_err;
- bytes = dwords << 2;
- if (bytes > FANOTIFY_INLINE_FH_LEN) {
- /* Treat failure to allocate fh as failure to allocate event */
+ /* No external buffer in a variable size allocated fh */
+ if (gfp && fh_len > FANOTIFY_INLINE_FH_LEN) {
+ /* Treat failure to allocate fh as failure to encode fh */
err = -ENOMEM;
- fid->ext_fh = kmalloc(bytes, gfp);
- if (!fid->ext_fh)
+ ext_buf = kmalloc(fh_len, gfp);
+ if (!ext_buf)
goto out_err;
+
+ *fanotify_fh_ext_buf_ptr(fh) = ext_buf;
+ buf = ext_buf;
+ fh->flags |= FANOTIFY_FH_FLAG_EXT_BUF;
}
- type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
- &dwords, NULL);
+ dwords = fh_len >> 2;
+ type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL);
err = -EINVAL;
- if (!type || type == FILEID_INVALID || bytes != dwords << 2)
+ if (!type || type == FILEID_INVALID || fh_len != dwords << 2)
goto out_err;
- fid->fsid = *fsid;
- event->fh_len = bytes;
+ fh->type = type;
+ fh->len = fh_len;
- return type;
+ return FANOTIFY_FH_HDR_LEN + fh_len;
out_err:
- pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
- "type=%d, bytes=%d, err=%i)\n",
- fsid->val[0], fsid->val[1], type, bytes, err);
- kfree(fid->ext_fh);
- fid->ext_fh = NULL;
- event->fh_len = 0;
-
- return FILEID_INVALID;
+ pr_warn_ratelimited("fanotify: failed to encode fid (type=%d, len=%d, err=%i)\n",
+ type, fh_len, err);
+ kfree(ext_buf);
+ *fanotify_fh_ext_buf_ptr(fh) = NULL;
+ /* Report the event without a file identifier on encode error */
+ fh->type = FILEID_INVALID;
+ fh->len = 0;
+ return 0;
}
/*
@@ -270,26 +399,180 @@ static int fanotify_encode_fid(struct fanotify_event *event,
* FS_ATTRIB reports the child inode even if reported on a watched parent.
* FS_CREATE reports the modified dir inode and not the created inode.
*/
-static struct inode *fanotify_fid_inode(struct inode *to_tell, u32 event_mask,
- const void *data, int data_type)
+static struct inode *fanotify_fid_inode(u32 event_mask, const void *data,
+ int data_type, struct inode *dir)
{
if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
- return to_tell;
- else if (data_type == FSNOTIFY_EVENT_INODE)
- return (struct inode *)data;
- else if (data_type == FSNOTIFY_EVENT_PATH)
- return d_inode(((struct path *)data)->dentry);
- return NULL;
+ return dir;
+
+ return fsnotify_data_inode(data, data_type);
}
-struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
- struct inode *inode, u32 mask,
- const void *data, int data_type,
- __kernel_fsid_t *fsid)
+/*
+ * The inode to use as identifier when reporting dir fid depends on the event.
+ * Report the modified directory inode on dirent modification events.
+ * Report the "victim" inode if "victim" is a directory.
+ * Report the parent inode if "victim" is not a directory and event is
+ * reported to parent.
+ * Otherwise, do not report dir fid.
+ */
+static struct inode *fanotify_dfid_inode(u32 event_mask, const void *data,
+ int data_type, struct inode *dir)
+{
+ struct inode *inode = fsnotify_data_inode(data, data_type);
+
+ if (event_mask & ALL_FSNOTIFY_DIRENT_EVENTS)
+ return dir;
+
+ if (S_ISDIR(inode->i_mode))
+ return inode;
+
+ return dir;
+}
+
+static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
+ gfp_t gfp)
+{
+ struct fanotify_path_event *pevent;
+
+ pevent = kmem_cache_alloc(fanotify_path_event_cachep, gfp);
+ if (!pevent)
+ return NULL;
+
+ pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH;
+ pevent->path = *path;
+ path_get(path);
+
+ return &pevent->fae;
+}
+
+static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
+ gfp_t gfp)
+{
+ struct fanotify_perm_event *pevent;
+
+ pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
+ if (!pevent)
+ return NULL;
+
+ pevent->fae.type = FANOTIFY_EVENT_TYPE_PATH_PERM;
+ pevent->response = 0;
+ pevent->state = FAN_EVENT_INIT;
+ pevent->path = *path;
+ path_get(path);
+
+ return &pevent->fae;
+}
+
+static struct fanotify_event *fanotify_alloc_fid_event(struct inode *id,
+ __kernel_fsid_t *fsid,
+ gfp_t gfp)
+{
+ struct fanotify_fid_event *ffe;
+
+ ffe = kmem_cache_alloc(fanotify_fid_event_cachep, gfp);
+ if (!ffe)
+ return NULL;
+
+ ffe->fae.type = FANOTIFY_EVENT_TYPE_FID;
+ ffe->fsid = *fsid;
+ fanotify_encode_fh(&ffe->object_fh, id, fanotify_encode_fh_len(id),
+ gfp);
+
+ return &ffe->fae;
+}
+
+static struct fanotify_event *fanotify_alloc_name_event(struct inode *id,
+ __kernel_fsid_t *fsid,
+ const struct qstr *file_name,
+ struct inode *child,
+ gfp_t gfp)
+{
+ struct fanotify_name_event *fne;
+ struct fanotify_info *info;
+ struct fanotify_fh *dfh, *ffh;
+ unsigned int dir_fh_len = fanotify_encode_fh_len(id);
+ unsigned int child_fh_len = fanotify_encode_fh_len(child);
+ unsigned int size;
+
+ size = sizeof(*fne) + FANOTIFY_FH_HDR_LEN + dir_fh_len;
+ if (child_fh_len)
+ size += FANOTIFY_FH_HDR_LEN + child_fh_len;
+ if (file_name)
+ size += file_name->len + 1;
+ fne = kmalloc(size, gfp);
+ if (!fne)
+ return NULL;
+
+ fne->fae.type = FANOTIFY_EVENT_TYPE_FID_NAME;
+ fne->fsid = *fsid;
+ info = &fne->info;
+ fanotify_info_init(info);
+ dfh = fanotify_info_dir_fh(info);
+ info->dir_fh_totlen = fanotify_encode_fh(dfh, id, dir_fh_len, 0);
+ if (child_fh_len) {
+ ffh = fanotify_info_file_fh(info);
+ info->file_fh_totlen = fanotify_encode_fh(ffh, child, child_fh_len, 0);
+ }
+ if (file_name)
+ fanotify_info_copy_name(info, file_name);
+
+ pr_debug("%s: ino=%lu size=%u dir_fh_len=%u child_fh_len=%u name_len=%u name='%.*s'\n",
+ __func__, id->i_ino, size, dir_fh_len, child_fh_len,
+ info->name_len, info->name_len, fanotify_info_name(info));
+
+ return &fne->fae;
+}
+
+static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
+ u32 mask, const void *data,
+ int data_type, struct inode *dir,
+ const struct qstr *file_name,
+ __kernel_fsid_t *fsid)
{
struct fanotify_event *event = NULL;
gfp_t gfp = GFP_KERNEL_ACCOUNT;
- struct inode *id = fanotify_fid_inode(inode, mask, data, data_type);
+ struct inode *id = fanotify_fid_inode(mask, data, data_type, dir);
+ struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
+ const struct path *path = fsnotify_data_path(data, data_type);
+ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ struct mem_cgroup *old_memcg;
+ struct inode *child = NULL;
+ bool name_event = false;
+
+ if ((fid_mode & FAN_REPORT_DIR_FID) && dirid) {
+ /*
+ * With both flags FAN_REPORT_DIR_FID and FAN_REPORT_FID, we
+ * report the child fid for events reported on a non-dir child
+ * in addition to reporting the parent fid and maybe child name.
+ */
+ if ((fid_mode & FAN_REPORT_FID) &&
+ id != dirid && !(mask & FAN_ONDIR))
+ child = id;
+
+ id = dirid;
+
+ /*
+ * We record file name only in a group with FAN_REPORT_NAME
+ * and when we have a directory inode to report.
+ *
+ * For directory entry modification event, we record the fid of
+ * the directory and the name of the modified entry.
+ *
+ * For event on non-directory that is reported to parent, we
+ * record the fid of the parent and the name of the child.
+ *
+ * Even if not reporting name, we need a variable length
+ * fanotify_name_event if reporting both parent and child fids.
+ */
+ if (!(fid_mode & FAN_REPORT_NAME)) {
+ name_event = !!child;
+ file_name = NULL;
+ } else if ((mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+ !(mask & FAN_ONDIR)) {
+ name_event = true;
+ }
+ }
/*
* For queues with unlimited length lost events are not expected and
@@ -303,49 +586,35 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
gfp |= __GFP_RETRY_MAYFAIL;
/* Whoever is interested in the event, pays for the allocation. */
- memalloc_use_memcg(group->memcg);
+ old_memcg = set_active_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
- struct fanotify_perm_event *pevent;
-
- pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
- if (!pevent)
- goto out;
- event = &pevent->fae;
- pevent->response = 0;
- pevent->state = FAN_EVENT_INIT;
- goto init;
+ event = fanotify_alloc_perm_event(path, gfp);
+ } else if (name_event && (file_name || child)) {
+ event = fanotify_alloc_name_event(id, fsid, file_name, child,
+ gfp);
+ } else if (fid_mode) {
+ event = fanotify_alloc_fid_event(id, fsid, gfp);
+ } else {
+ event = fanotify_alloc_path_event(path, gfp);
}
- event = kmem_cache_alloc(fanotify_event_cachep, gfp);
+
if (!event)
goto out;
-init: __maybe_unused
+
/*
* Use the victim inode instead of the watching inode as the id for
* event queue, so event reported on parent is merged with event
* reported on child when both directory and child watches exist.
*/
- fsnotify_init_event(&event->fse, (unsigned long)id);
- event->mask = mask;
+ fanotify_init_event(event, (unsigned long)id, mask);
if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
event->pid = get_pid(task_pid(current));
else
event->pid = get_pid(task_tgid(current));
- event->fh_len = 0;
- if (id && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
- /* Report the event without a file identifier on encode error */
- event->fh_type = fanotify_encode_fid(event, id, gfp, fsid);
- } else if (data_type == FSNOTIFY_EVENT_PATH) {
- event->fh_type = FILEID_ROOT;
- event->path = *((struct path *)data);
- path_get(&event->path);
- } else {
- event->fh_type = FILEID_INVALID;
- event->path.mnt = NULL;
- event->path.dentry = NULL;
- }
+
out:
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
return event;
}
@@ -382,9 +651,9 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
return fsid;
}
-static int fanotify_handle_event(struct fsnotify_group *group,
- struct inode *inode,
- u32 mask, const void *data, int data_type,
+static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
+ const void *data, int data_type,
+ struct inode *dir,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
@@ -416,12 +685,11 @@ static int fanotify_handle_event(struct fsnotify_group *group,
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
mask = fanotify_group_event_mask(group, iter_info, mask, data,
- data_type);
+ data_type, dir);
if (!mask)
return 0;
- pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
- mask);
+ pr_debug("%s: group=%p mask=%x\n", __func__, group, mask);
if (fanotify_is_perm_event(mask)) {
/*
@@ -432,15 +700,15 @@ static int fanotify_handle_event(struct fsnotify_group *group,
return 0;
}
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
fsid = fanotify_get_fsid(iter_info);
/* Racing with mark destruction or creation? */
if (!fsid.val[0] && !fsid.val[1])
return 0;
}
- event = fanotify_alloc_event(group, inode, mask, data, data_type,
- &fsid);
+ event = fanotify_alloc_event(group, mask, data, data_type, dir,
+ file_name, &fsid);
ret = -ENOMEM;
if (unlikely(!event)) {
/*
@@ -462,7 +730,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
ret = 0;
} else if (fanotify_is_perm_event(mask)) {
- ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
+ ret = fanotify_get_response(group, FANOTIFY_PERM(event),
iter_info);
}
finish:
@@ -481,22 +749,57 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
free_uid(user);
}
+static void fanotify_free_path_event(struct fanotify_event *event)
+{
+ path_put(fanotify_event_path(event));
+ kmem_cache_free(fanotify_path_event_cachep, FANOTIFY_PE(event));
+}
+
+static void fanotify_free_perm_event(struct fanotify_event *event)
+{
+ path_put(fanotify_event_path(event));
+ kmem_cache_free(fanotify_perm_event_cachep, FANOTIFY_PERM(event));
+}
+
+static void fanotify_free_fid_event(struct fanotify_event *event)
+{
+ struct fanotify_fid_event *ffe = FANOTIFY_FE(event);
+
+ if (fanotify_fh_has_ext_buf(&ffe->object_fh))
+ kfree(fanotify_fh_ext_buf(&ffe->object_fh));
+ kmem_cache_free(fanotify_fid_event_cachep, ffe);
+}
+
+static void fanotify_free_name_event(struct fanotify_event *event)
+{
+ kfree(FANOTIFY_NE(event));
+}
+
static void fanotify_free_event(struct fsnotify_event *fsn_event)
{
struct fanotify_event *event;
event = FANOTIFY_E(fsn_event);
- if (fanotify_event_has_path(event))
- path_put(&event->path);
- else if (fanotify_event_has_ext_fh(event))
- kfree(event->fid.ext_fh);
put_pid(event->pid);
- if (fanotify_is_perm_event(event->mask)) {
- kmem_cache_free(fanotify_perm_event_cachep,
- FANOTIFY_PE(fsn_event));
- return;
+ switch (event->type) {
+ case FANOTIFY_EVENT_TYPE_PATH:
+ fanotify_free_path_event(event);
+ break;
+ case FANOTIFY_EVENT_TYPE_PATH_PERM:
+ fanotify_free_perm_event(event);
+ break;
+ case FANOTIFY_EVENT_TYPE_FID:
+ fanotify_free_fid_event(event);
+ break;
+ case FANOTIFY_EVENT_TYPE_FID_NAME:
+ fanotify_free_name_event(event);
+ break;
+ case FANOTIFY_EVENT_TYPE_OVERFLOW:
+ kfree(event);
+ break;
+ default:
+ WARN_ON_ONCE(1);
}
- kmem_cache_free(fanotify_event_cachep, event);
}
static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 68b3050..896c819 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -5,7 +5,8 @@
#include <linux/exportfs.h>
extern struct kmem_cache *fanotify_mark_cache;
-extern struct kmem_cache *fanotify_event_cachep;
+extern struct kmem_cache *fanotify_fid_event_cachep;
+extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
/* Possible states of the permission event */
@@ -18,94 +19,222 @@ enum {
/*
* 3 dwords are sufficient for most local fs (64bit ino, 32bit generation).
- * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and
- * fh_* fields increase the size of fanotify_event by another 4 bytes.
- * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and
- * fh_* fields are packed in a hole after mask.
+ * fh buf should be dword aligned. On 64bit arch, the ext_buf pointer is
+ * stored in either the first or last 2 dwords.
*/
-#if BITS_PER_LONG == 32
#define FANOTIFY_INLINE_FH_LEN (3 << 2)
-#else
-#define FANOTIFY_INLINE_FH_LEN (4 << 2)
-#endif
+#define FANOTIFY_FH_HDR_LEN offsetof(struct fanotify_fh, buf)
-struct fanotify_fid {
- __kernel_fsid_t fsid;
- union {
- unsigned char fh[FANOTIFY_INLINE_FH_LEN];
- unsigned char *ext_fh;
- };
-};
+/* Fixed size struct for file handle */
+struct fanotify_fh {
+ u8 type;
+ u8 len;
+#define FANOTIFY_FH_FLAG_EXT_BUF 1
+ u8 flags;
+ u8 pad;
+ unsigned char buf[];
+} __aligned(4);
-static inline void *fanotify_fid_fh(struct fanotify_fid *fid,
- unsigned int fh_len)
+/* Variable size struct for dir file handle + child file handle + name */
+struct fanotify_info {
+ /* size of dir_fh/file_fh including fanotify_fh hdr size */
+ u8 dir_fh_totlen;
+ u8 file_fh_totlen;
+ u8 name_len;
+ u8 pad;
+ unsigned char buf[];
+ /*
+ * (struct fanotify_fh) dir_fh starts at buf[0]
+ * (optional) file_fh starts at buf[dir_fh_totlen]
+ * name starts at buf[dir_fh_totlen + file_fh_totlen]
+ */
+} __aligned(4);
+
+static inline bool fanotify_fh_has_ext_buf(struct fanotify_fh *fh)
{
- return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh;
+ return (fh->flags & FANOTIFY_FH_FLAG_EXT_BUF);
}
-static inline bool fanotify_fid_equal(struct fanotify_fid *fid1,
- struct fanotify_fid *fid2,
- unsigned int fh_len)
+static inline char **fanotify_fh_ext_buf_ptr(struct fanotify_fh *fh)
{
- return fid1->fsid.val[0] == fid2->fsid.val[0] &&
- fid1->fsid.val[1] == fid2->fsid.val[1] &&
- !memcmp(fanotify_fid_fh(fid1, fh_len),
- fanotify_fid_fh(fid2, fh_len), fh_len);
+ BUILD_BUG_ON(FANOTIFY_FH_HDR_LEN % 4);
+ BUILD_BUG_ON(__alignof__(char *) - 4 + sizeof(char *) >
+ FANOTIFY_INLINE_FH_LEN);
+ return (char **)ALIGN((unsigned long)(fh->buf), __alignof__(char *));
+}
+
+static inline void *fanotify_fh_ext_buf(struct fanotify_fh *fh)
+{
+ return *fanotify_fh_ext_buf_ptr(fh);
+}
+
+static inline void *fanotify_fh_buf(struct fanotify_fh *fh)
+{
+ return fanotify_fh_has_ext_buf(fh) ? fanotify_fh_ext_buf(fh) : fh->buf;
+}
+
+static inline int fanotify_info_dir_fh_len(struct fanotify_info *info)
+{
+ if (!info->dir_fh_totlen ||
+ WARN_ON_ONCE(info->dir_fh_totlen < FANOTIFY_FH_HDR_LEN))
+ return 0;
+
+ return info->dir_fh_totlen - FANOTIFY_FH_HDR_LEN;
+}
+
+static inline struct fanotify_fh *fanotify_info_dir_fh(struct fanotify_info *info)
+{
+ BUILD_BUG_ON(offsetof(struct fanotify_info, buf) % 4);
+
+ return (struct fanotify_fh *)info->buf;
+}
+
+static inline int fanotify_info_file_fh_len(struct fanotify_info *info)
+{
+ if (!info->file_fh_totlen ||
+ WARN_ON_ONCE(info->file_fh_totlen < FANOTIFY_FH_HDR_LEN))
+ return 0;
+
+ return info->file_fh_totlen - FANOTIFY_FH_HDR_LEN;
+}
+
+static inline struct fanotify_fh *fanotify_info_file_fh(struct fanotify_info *info)
+{
+ return (struct fanotify_fh *)(info->buf + info->dir_fh_totlen);
+}
+
+static inline const char *fanotify_info_name(struct fanotify_info *info)
+{
+ return info->buf + info->dir_fh_totlen + info->file_fh_totlen;
+}
+
+static inline void fanotify_info_init(struct fanotify_info *info)
+{
+ info->dir_fh_totlen = 0;
+ info->file_fh_totlen = 0;
+ info->name_len = 0;
+}
+
+static inline void fanotify_info_copy_name(struct fanotify_info *info,
+ const struct qstr *name)
+{
+ info->name_len = name->len;
+ strcpy(info->buf + info->dir_fh_totlen + info->file_fh_totlen,
+ name->name);
}
/*
- * Structure for normal fanotify events. It gets allocated in
+ * Common structure for fanotify events. Concrete structs are allocated in
* fanotify_handle_event() and freed when the information is retrieved by
- * userspace
+ * userspace. The type of event determines how it was allocated, how it will
+ * be freed and which concrete struct it may be cast to.
*/
+enum fanotify_event_type {
+ FANOTIFY_EVENT_TYPE_FID, /* fixed length */
+ FANOTIFY_EVENT_TYPE_FID_NAME, /* variable length */
+ FANOTIFY_EVENT_TYPE_PATH,
+ FANOTIFY_EVENT_TYPE_PATH_PERM,
+ FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
+};
+
struct fanotify_event {
struct fsnotify_event fse;
u32 mask;
- /*
- * Those fields are outside fanotify_fid to pack fanotify_event nicely
- * on 64bit arch and to use fh_type as an indication of whether path
- * or fid are used in the union:
- * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither.
- */
- u8 fh_type;
- u8 fh_len;
- u16 pad;
- union {
- /*
- * We hold ref to this path so it may be dereferenced at any
- * point during this object's lifetime
- */
- struct path path;
- /*
- * With FAN_REPORT_FID, we do not hold any reference on the
- * victim object. Instead we store its NFS file handle and its
- * filesystem's fsid as a unique identifier.
- */
- struct fanotify_fid fid;
- };
+ enum fanotify_event_type type;
struct pid *pid;
};
-static inline bool fanotify_event_has_path(struct fanotify_event *event)
+static inline void fanotify_init_event(struct fanotify_event *event,
+ unsigned long id, u32 mask)
{
- return event->fh_type == FILEID_ROOT;
+ fsnotify_init_event(&event->fse, id);
+ event->mask = mask;
+ event->pid = NULL;
}
-static inline bool fanotify_event_has_fid(struct fanotify_event *event)
+struct fanotify_fid_event {
+ struct fanotify_event fae;
+ __kernel_fsid_t fsid;
+ struct fanotify_fh object_fh;
+ /* Reserve space in object_fh.buf[] - access with fanotify_fh_buf() */
+ unsigned char _inline_fh_buf[FANOTIFY_INLINE_FH_LEN];
+};
+
+static inline struct fanotify_fid_event *
+FANOTIFY_FE(struct fanotify_event *event)
{
- return event->fh_type != FILEID_ROOT &&
- event->fh_type != FILEID_INVALID;
+ return container_of(event, struct fanotify_fid_event, fae);
}
-static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event)
+struct fanotify_name_event {
+ struct fanotify_event fae;
+ __kernel_fsid_t fsid;
+ struct fanotify_info info;
+};
+
+static inline struct fanotify_name_event *
+FANOTIFY_NE(struct fanotify_event *event)
{
- return fanotify_event_has_fid(event) &&
- event->fh_len > FANOTIFY_INLINE_FH_LEN;
+ return container_of(event, struct fanotify_name_event, fae);
}
-static inline void *fanotify_event_fh(struct fanotify_event *event)
+static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
{
- return fanotify_fid_fh(&event->fid, event->fh_len);
+ if (event->type == FANOTIFY_EVENT_TYPE_FID)
+ return &FANOTIFY_FE(event)->fsid;
+ else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
+ return &FANOTIFY_NE(event)->fsid;
+ else
+ return NULL;
+}
+
+static inline struct fanotify_fh *fanotify_event_object_fh(
+ struct fanotify_event *event)
+{
+ if (event->type == FANOTIFY_EVENT_TYPE_FID)
+ return &FANOTIFY_FE(event)->object_fh;
+ else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
+ return fanotify_info_file_fh(&FANOTIFY_NE(event)->info);
+ else
+ return NULL;
+}
+
+static inline struct fanotify_info *fanotify_event_info(
+ struct fanotify_event *event)
+{
+ if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
+ return &FANOTIFY_NE(event)->info;
+ else
+ return NULL;
+}
+
+static inline int fanotify_event_object_fh_len(struct fanotify_event *event)
+{
+ struct fanotify_info *info = fanotify_event_info(event);
+ struct fanotify_fh *fh = fanotify_event_object_fh(event);
+
+ if (info)
+ return info->file_fh_totlen ? fh->len : 0;
+ else
+ return fh ? fh->len : 0;
+}
+
+static inline int fanotify_event_dir_fh_len(struct fanotify_event *event)
+{
+ struct fanotify_info *info = fanotify_event_info(event);
+
+ return info ? fanotify_info_dir_fh_len(info) : 0;
+}
+
+struct fanotify_path_event {
+ struct fanotify_event fae;
+ struct path path;
+};
+
+static inline struct fanotify_path_event *
+FANOTIFY_PE(struct fanotify_event *event)
+{
+ return container_of(event, struct fanotify_path_event, fae);
}
/*
@@ -117,15 +246,16 @@ static inline void *fanotify_event_fh(struct fanotify_event *event)
*/
struct fanotify_perm_event {
struct fanotify_event fae;
+ struct path path;
unsigned short response; /* userspace answer to the event */
unsigned short state; /* state of the event */
int fd; /* fd we passed to userspace for this event */
};
static inline struct fanotify_perm_event *
-FANOTIFY_PE(struct fsnotify_event *fse)
+FANOTIFY_PERM(struct fanotify_event *event)
{
- return container_of(fse, struct fanotify_perm_event, fae.fse);
+ return container_of(event, struct fanotify_perm_event, fae);
}
static inline bool fanotify_is_perm_event(u32 mask)
@@ -139,7 +269,18 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
return container_of(fse, struct fanotify_event, fse);
}
-struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
- struct inode *inode, u32 mask,
- const void *data, int data_type,
- __kernel_fsid_t *fsid);
+static inline bool fanotify_event_has_path(struct fanotify_event *event)
+{
+ return event->type == FANOTIFY_EVENT_TYPE_PATH ||
+ event->type == FANOTIFY_EVENT_TYPE_PATH_PERM;
+}
+
+static inline struct path *fanotify_event_path(struct fanotify_event *event)
+{
+ if (event->type == FANOTIFY_EVENT_TYPE_PATH)
+ return &FANOTIFY_PE(event)->path;
+ else if (event->type == FANOTIFY_EVENT_TYPE_PATH_PERM)
+ return &FANOTIFY_PERM(event)->path;
+ else
+ return NULL;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8508ab5..18e014f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -46,32 +46,61 @@
extern const struct fsnotify_ops fanotify_fsnotify_ops;
struct kmem_cache *fanotify_mark_cache __read_mostly;
-struct kmem_cache *fanotify_event_cachep __read_mostly;
+struct kmem_cache *fanotify_fid_event_cachep __read_mostly;
+struct kmem_cache *fanotify_path_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
#define FANOTIFY_EVENT_ALIGN 4
+#define FANOTIFY_INFO_HDR_LEN \
+ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
-static int fanotify_event_info_len(struct fanotify_event *event)
+static int fanotify_fid_info_len(int fh_len, int name_len)
{
- if (!fanotify_event_has_fid(event))
- return 0;
+ int info_len = fh_len;
- return roundup(sizeof(struct fanotify_event_info_fid) +
- sizeof(struct file_handle) + event->fh_len,
- FANOTIFY_EVENT_ALIGN);
+ if (name_len)
+ info_len += name_len + 1;
+
+ return roundup(FANOTIFY_INFO_HDR_LEN + info_len, FANOTIFY_EVENT_ALIGN);
+}
+
+static int fanotify_event_info_len(unsigned int fid_mode,
+ struct fanotify_event *event)
+{
+ struct fanotify_info *info = fanotify_event_info(event);
+ int dir_fh_len = fanotify_event_dir_fh_len(event);
+ int fh_len = fanotify_event_object_fh_len(event);
+ int info_len = 0;
+ int dot_len = 0;
+
+ if (dir_fh_len) {
+ info_len += fanotify_fid_info_len(dir_fh_len, info->name_len);
+ } else if ((fid_mode & FAN_REPORT_NAME) && (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_NAME, if name was not recorded in
+ * event on a directory, we will report the name ".".
+ */
+ dot_len = 1;
+ }
+
+ if (fh_len)
+ info_len += fanotify_fid_info_len(fh_len, dot_len);
+
+ return info_len;
}
/*
- * Get an fsnotify notification event if one exists and is small
+ * Get an fanotify notification event if one exists and is small
* enough to fit in "count". Return an error pointer if the count
* is not large enough. When permission event is dequeued, its state is
* updated accordingly.
*/
-static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+static struct fanotify_event *get_one_event(struct fsnotify_group *group,
size_t count)
{
size_t event_size = FAN_EVENT_METADATA_LEN;
- struct fsnotify_event *fsn_event = NULL;
+ struct fanotify_event *event = NULL;
+ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -79,32 +108,29 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
if (fsnotify_notify_queue_is_empty(group))
goto out;
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
- event_size += fanotify_event_info_len(
+ if (fid_mode) {
+ event_size += fanotify_event_info_len(fid_mode,
FANOTIFY_E(fsnotify_peek_first_event(group)));
}
if (event_size > count) {
- fsn_event = ERR_PTR(-EINVAL);
+ event = ERR_PTR(-EINVAL);
goto out;
}
- fsn_event = fsnotify_remove_first_event(group);
- if (fanotify_is_perm_event(FANOTIFY_E(fsn_event)->mask))
- FANOTIFY_PE(fsn_event)->state = FAN_EVENT_REPORTED;
+ event = FANOTIFY_E(fsnotify_remove_first_event(group));
+ if (fanotify_is_perm_event(event->mask))
+ FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
out:
spin_unlock(&group->notification_lock);
- return fsn_event;
+ return event;
}
-static int create_fd(struct fsnotify_group *group,
- struct fanotify_event *event,
+static int create_fd(struct fsnotify_group *group, struct path *path,
struct file **file)
{
int client_fd;
struct file *new_file;
- pr_debug("%s: group=%p event=%p\n", __func__, group, event);
-
client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
if (client_fd < 0)
return client_fd;
@@ -113,14 +139,9 @@ static int create_fd(struct fsnotify_group *group,
* we need a new file handle for the userspace program so it can read even if it was
* originally opened O_WRONLY.
*/
- /* it's possible this event was an overflow event. in that case dentry and mnt
- * are NULL; That's fine, just don't call dentry open */
- if (event->path.dentry && event->path.mnt)
- new_file = dentry_open(&event->path,
- group->fanotify_data.f_flags | FMODE_NONOTIFY,
- current_cred());
- else
- new_file = ERR_PTR(-EOVERFLOW);
+ new_file = dentry_open(path,
+ group->fanotify_data.f_flags | FMODE_NONOTIFY,
+ current_cred());
if (IS_ERR(new_file)) {
/*
* we still send an event even if we can't open the file. this
@@ -204,90 +225,134 @@ static int process_access_response(struct fsnotify_group *group,
return -ENOENT;
}
-static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
+static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
+ int info_type, const char *name, size_t name_len,
+ char __user *buf, size_t count)
{
struct fanotify_event_info_fid info = { };
struct file_handle handle = { };
- unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh;
- size_t fh_len = event->fh_len;
- size_t len = fanotify_event_info_len(event);
+ unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh_buf;
+ size_t fh_len = fh ? fh->len : 0;
+ size_t info_len = fanotify_fid_info_len(fh_len, name_len);
+ size_t len = info_len;
- if (!len)
+ pr_debug("%s: fh_len=%zu name_len=%zu, info_len=%zu, count=%zu\n",
+ __func__, fh_len, name_len, info_len, count);
+
+ if (!fh_len)
return 0;
- if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len))
+ if (WARN_ON_ONCE(len < sizeof(info) || len > count))
return -EFAULT;
- /* Copy event info fid header followed by vaiable sized file handle */
- info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID;
+ /*
+ * Copy event info fid header followed by variable sized file handle
+ * and optionally followed by variable sized filename.
+ */
+ switch (info_type) {
+ case FAN_EVENT_INFO_TYPE_FID:
+ case FAN_EVENT_INFO_TYPE_DFID:
+ if (WARN_ON_ONCE(name_len))
+ return -EFAULT;
+ break;
+ case FAN_EVENT_INFO_TYPE_DFID_NAME:
+ if (WARN_ON_ONCE(!name || !name_len))
+ return -EFAULT;
+ break;
+ default:
+ return -EFAULT;
+ }
+
+ info.hdr.info_type = info_type;
info.hdr.len = len;
- info.fsid = event->fid.fsid;
+ info.fsid = *fsid;
if (copy_to_user(buf, &info, sizeof(info)))
return -EFAULT;
buf += sizeof(info);
len -= sizeof(info);
- handle.handle_type = event->fh_type;
+ if (WARN_ON_ONCE(len < sizeof(handle)))
+ return -EFAULT;
+
+ handle.handle_type = fh->type;
handle.handle_bytes = fh_len;
if (copy_to_user(buf, &handle, sizeof(handle)))
return -EFAULT;
buf += sizeof(handle);
len -= sizeof(handle);
- /*
- * For an inline fh, copy through stack to exclude the copy from
- * usercopy hardening protections.
- */
- fh = fanotify_event_fh(event);
- if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
- memcpy(bounce, fh, fh_len);
- fh = bounce;
- }
- if (copy_to_user(buf, fh, fh_len))
+ if (WARN_ON_ONCE(len < fh_len))
return -EFAULT;
- /* Pad with 0's */
+ /*
+ * For an inline fh and inline file name, copy through stack to exclude
+ * the copy from usercopy hardening protections.
+ */
+ fh_buf = fanotify_fh_buf(fh);
+ if (fh_len <= FANOTIFY_INLINE_FH_LEN) {
+ memcpy(bounce, fh_buf, fh_len);
+ fh_buf = bounce;
+ }
+ if (copy_to_user(buf, fh_buf, fh_len))
+ return -EFAULT;
+
buf += fh_len;
len -= fh_len;
+
+ if (name_len) {
+ /* Copy the filename with terminating null */
+ name_len++;
+ if (WARN_ON_ONCE(len < name_len))
+ return -EFAULT;
+
+ if (copy_to_user(buf, name, name_len))
+ return -EFAULT;
+
+ buf += name_len;
+ len -= name_len;
+ }
+
+ /* Pad with 0's */
WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
if (len > 0 && clear_user(buf, len))
return -EFAULT;
- return 0;
+ return info_len;
}
static ssize_t copy_event_to_user(struct fsnotify_group *group,
- struct fsnotify_event *fsn_event,
+ struct fanotify_event *event,
char __user *buf, size_t count)
{
struct fanotify_event_metadata metadata;
- struct fanotify_event *event;
+ struct path *path = fanotify_event_path(event);
+ struct fanotify_info *info = fanotify_event_info(event);
+ unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
struct file *f = NULL;
int ret, fd = FAN_NOFD;
+ int info_type = 0;
- pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- event = container_of(fsn_event, struct fanotify_event, fse);
- metadata.event_len = FAN_EVENT_METADATA_LEN;
+ metadata.event_len = FAN_EVENT_METADATA_LEN +
+ fanotify_event_info_len(fid_mode, event);
metadata.metadata_len = FAN_EVENT_METADATA_LEN;
metadata.vers = FANOTIFY_METADATA_VERSION;
metadata.reserved = 0;
metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
metadata.pid = pid_vnr(event->pid);
- if (fanotify_event_has_path(event)) {
- fd = create_fd(group, event, &f);
+ if (path && path->mnt && path->dentry) {
+ fd = create_fd(group, path, &f);
if (fd < 0)
return fd;
- } else if (fanotify_event_has_fid(event)) {
- metadata.event_len += fanotify_event_info_len(event);
}
metadata.fd = fd;
ret = -EFAULT;
/*
* Sanity check copy size in case get_one_event() and
- * fill_event_metadata() event_len sizes ever get out of sync.
+ * event_len sizes ever get out of sync.
*/
if (WARN_ON_ONCE(metadata.event_len > count))
goto out_close_fd;
@@ -295,17 +360,78 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
goto out_close_fd;
- if (fanotify_is_perm_event(event->mask))
- FANOTIFY_PE(fsn_event)->fd = fd;
+ buf += FAN_EVENT_METADATA_LEN;
+ count -= FAN_EVENT_METADATA_LEN;
- if (fanotify_event_has_path(event)) {
- fd_install(fd, f);
- } else if (fanotify_event_has_fid(event)) {
- ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN);
+ if (fanotify_is_perm_event(event->mask))
+ FANOTIFY_PERM(event)->fd = fd;
+
+ /* Event info records order is: dir fid + name, child fid */
+ if (fanotify_event_dir_fh_len(event)) {
+ info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME :
+ FAN_EVENT_INFO_TYPE_DFID;
+ ret = copy_info_to_user(fanotify_event_fsid(event),
+ fanotify_info_dir_fh(info),
+ info_type, fanotify_info_name(info),
+ info->name_len, buf, count);
if (ret < 0)
- return ret;
+ goto out_close_fd;
+
+ buf += ret;
+ count -= ret;
}
+ if (fanotify_event_object_fh_len(event)) {
+ const char *dot = NULL;
+ int dot_len = 0;
+
+ if (fid_mode == FAN_REPORT_FID || info_type) {
+ /*
+ * With only group flag FAN_REPORT_FID only type FID is
+ * reported. Second info record type is always FID.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ } else if ((fid_mode & FAN_REPORT_NAME) &&
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_NAME, if name was not
+ * recorded in an event on a directory, report the
+ * name "." with info type DFID_NAME.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID_NAME;
+ dot = ".";
+ dot_len = 1;
+ } else if ((event->mask & ALL_FSNOTIFY_DIRENT_EVENTS) ||
+ (event->mask & FAN_ONDIR)) {
+ /*
+ * With group flag FAN_REPORT_DIR_FID, a single info
+ * record has type DFID for directory entry modification
+ * event and for event on a directory.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_DFID;
+ } else {
+ /*
+ * With group flags FAN_REPORT_DIR_FID|FAN_REPORT_FID,
+ * a single info record has type FID for event on a
+ * non-directory, when there is no directory to report.
+ * For example, on FAN_DELETE_SELF event.
+ */
+ info_type = FAN_EVENT_INFO_TYPE_FID;
+ }
+
+ ret = copy_info_to_user(fanotify_event_fsid(event),
+ fanotify_event_object_fh(event),
+ info_type, dot, dot_len, buf, count);
+ if (ret < 0)
+ goto out_close_fd;
+
+ buf += ret;
+ count -= ret;
+ }
+
+ if (f)
+ fd_install(fd, f);
+
return metadata.event_len;
out_close_fd:
@@ -335,7 +461,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct fsnotify_group *group;
- struct fsnotify_event *kevent;
+ struct fanotify_event *event;
char __user *start;
int ret;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -347,13 +473,18 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
add_wait_queue(&group->notification_waitq, &wait);
while (1) {
- kevent = get_one_event(group, count);
- if (IS_ERR(kevent)) {
- ret = PTR_ERR(kevent);
+ /*
+ * User can supply arbitrarily large buffer. Avoid softlockups
+ * in case there are lots of available events.
+ */
+ cond_resched();
+ event = get_one_event(group, count);
+ if (IS_ERR(event)) {
+ ret = PTR_ERR(event);
break;
}
- if (!kevent) {
+ if (!event) {
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
break;
@@ -369,7 +500,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
continue;
}
- ret = copy_event_to_user(group, kevent, buf, count);
+ ret = copy_event_to_user(group, event, buf, count);
if (unlikely(ret == -EOPENSTALE)) {
/*
* We cannot report events with stale fd so drop it.
@@ -384,17 +515,17 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
* Permission events get queued to wait for response. Other
* events can be destroyed now.
*/
- if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) {
- fsnotify_destroy_event(group, kevent);
+ if (!fanotify_is_perm_event(event->mask)) {
+ fsnotify_destroy_event(group, &event->fse);
} else {
if (ret <= 0) {
spin_lock(&group->notification_lock);
finish_permission_event(group,
- FANOTIFY_PE(kevent), FAN_DENY);
+ FANOTIFY_PERM(event), FAN_DENY);
wake_up(&group->fanotify_data.access_waitq);
} else {
spin_lock(&group->notification_lock);
- list_add_tail(&kevent->list,
+ list_add_tail(&event->fse.list,
&group->fanotify_data.access_list);
spin_unlock(&group->notification_lock);
}
@@ -422,8 +553,10 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
group = file->private_data;
- if (count > sizeof(response))
- count = sizeof(response);
+ if (count < sizeof(response))
+ return -EINVAL;
+
+ count = sizeof(response);
pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
@@ -440,8 +573,6 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
static int fanotify_release(struct inode *ignored, struct file *file)
{
struct fsnotify_group *group = file->private_data;
- struct fanotify_perm_event *event;
- struct fsnotify_event *fsn_event;
/*
* Stop new events from arriving in the notification queue. since
@@ -456,6 +587,8 @@ static int fanotify_release(struct inode *ignored, struct file *file)
*/
spin_lock(&group->notification_lock);
while (!list_empty(&group->fanotify_data.access_list)) {
+ struct fanotify_perm_event *event;
+
event = list_first_entry(&group->fanotify_data.access_list,
struct fanotify_perm_event, fae.fse.list);
list_del_init(&event->fae.fse.list);
@@ -469,12 +602,14 @@ static int fanotify_release(struct inode *ignored, struct file *file)
* response is consumed and fanotify_get_response() returns.
*/
while (!fsnotify_notify_queue_is_empty(group)) {
- fsn_event = fsnotify_remove_first_event(group);
- if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) {
+ struct fanotify_event *event;
+
+ event = FANOTIFY_E(fsnotify_remove_first_event(group));
+ if (!(event->mask & FANOTIFY_PERM_EVENTS)) {
spin_unlock(&group->notification_lock);
- fsnotify_destroy_event(group, fsn_event);
+ fsnotify_destroy_event(group, &event->fse);
} else {
- finish_permission_event(group, FANOTIFY_PE(fsn_event),
+ finish_permission_event(group, FANOTIFY_PERM(event),
FAN_ALLOW);
}
spin_lock(&group->notification_lock);
@@ -523,7 +658,7 @@ static const struct file_operations fanotify_fops = {
.fasync = NULL,
.release = fanotify_release,
.unlocked_ioctl = fanotify_ioctl,
- .compat_ioctl = fanotify_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
@@ -582,12 +717,13 @@ static int fanotify_find_path(int dfd, const char __user *filename,
}
static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
- __u32 mask,
- unsigned int flags,
- int *destroy)
+ __u32 mask, unsigned int flags,
+ __u32 umask, int *destroy)
{
__u32 oldmask = 0;
+ /* umask bits cannot be removed by user */
+ mask &= ~umask;
spin_lock(&fsn_mark->lock);
if (!(flags & FAN_MARK_IGNORED_MASK)) {
oldmask = fsn_mark->mask;
@@ -595,7 +731,13 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
} else {
fsn_mark->ignored_mask &= ~mask;
}
- *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
+ /*
+ * We need to keep the mark around even if remaining mask cannot
+ * result in any events (e.g. mask == FAN_ONDIR) to support incremenal
+ * changes to the mask.
+ * Destroy mark when only umask bits remain.
+ */
+ *destroy = !((fsn_mark->mask | fsn_mark->ignored_mask) & ~umask);
spin_unlock(&fsn_mark->lock);
return mask & oldmask;
@@ -603,7 +745,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
static int fanotify_remove_mark(struct fsnotify_group *group,
fsnotify_connp_t *connp, __u32 mask,
- unsigned int flags)
+ unsigned int flags, __u32 umask)
{
struct fsnotify_mark *fsn_mark = NULL;
__u32 removed;
@@ -617,7 +759,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
}
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
- &destroy_mark);
+ umask, &destroy_mark);
if (removed & fsnotify_conn_mask(fsn_mark->connector))
fsnotify_recalc_mask(fsn_mark->connector);
if (destroy_mark)
@@ -633,25 +775,26 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
struct vfsmount *mnt, __u32 mask,
- unsigned int flags)
+ unsigned int flags, __u32 umask)
{
return fanotify_remove_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
- mask, flags);
+ mask, flags, umask);
}
static int fanotify_remove_sb_mark(struct fsnotify_group *group,
- struct super_block *sb, __u32 mask,
- unsigned int flags)
+ struct super_block *sb, __u32 mask,
+ unsigned int flags, __u32 umask)
{
- return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask, flags);
+ return fanotify_remove_mark(group, &sb->s_fsnotify_marks, mask,
+ flags, umask);
}
static int fanotify_remove_inode_mark(struct fsnotify_group *group,
struct inode *inode, __u32 mask,
- unsigned int flags)
+ unsigned int flags, __u32 umask)
{
return fanotify_remove_mark(group, &inode->i_fsnotify_marks, mask,
- flags);
+ flags, umask);
}
static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
@@ -762,13 +905,28 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
}
+static struct fsnotify_event *fanotify_alloc_overflow_event(void)
+{
+ struct fanotify_event *oevent;
+
+ oevent = kmalloc(sizeof(*oevent), GFP_KERNEL_ACCOUNT);
+ if (!oevent)
+ return NULL;
+
+ fanotify_init_event(oevent, 0, FS_Q_OVERFLOW);
+ oevent->type = FANOTIFY_EVENT_TYPE_OVERFLOW;
+
+ return &oevent->fse;
+}
+
/* fanotify syscalls */
SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
{
struct fsnotify_group *group;
int f_flags, fd;
struct user_struct *user;
- struct fanotify_event *oevent;
+ unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
+ unsigned int class = flags & FANOTIFY_CLASS_BITS;
pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags);
@@ -795,8 +953,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
return -EINVAL;
}
- if ((flags & FAN_REPORT_FID) &&
- (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF)
+ if (fid_mode && class != FAN_CLASS_NOTIF)
+ return -EINVAL;
+
+ /*
+ * Child name is reported with parent fid so requires dir fid.
+ * We can report both child fid and dir fid with or without name.
+ */
+ if ((fid_mode & FAN_REPORT_NAME) && !(fid_mode & FAN_REPORT_DIR_FID))
return -EINVAL;
user = get_current_user();
@@ -823,20 +987,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
atomic_inc(&user->fanotify_listeners);
group->memcg = get_mem_cgroup_from_mm(current->mm);
- oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL,
- FSNOTIFY_EVENT_NONE, NULL);
- if (unlikely(!oevent)) {
+ group->overflow_event = fanotify_alloc_overflow_event();
+ if (unlikely(!group->overflow_event)) {
fd = -ENOMEM;
goto out_destroy_group;
}
- group->overflow_event = &oevent->fse;
if (force_o_largefile())
event_f_flags |= O_LARGEFILE;
group->fanotify_data.f_flags = event_f_flags;
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- switch (flags & FANOTIFY_CLASS_BITS) {
+ switch (class) {
case FAN_CLASS_NOTIF:
group->priority = FS_PRIO_0;
break;
@@ -955,7 +1117,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
__kernel_fsid_t __fsid, *fsid = NULL;
u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
- unsigned int obj_type;
+ bool ignored = flags & FAN_MARK_IGNORED_MASK;
+ unsigned int obj_type, fid_mode;
+ u32 umask = 0;
int ret;
pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -983,7 +1147,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
}
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
- case FAN_MARK_ADD: /* fallthrough */
+ case FAN_MARK_ADD:
case FAN_MARK_REMOVE:
if (!mask)
return -EINVAL;
@@ -1002,6 +1166,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & ~valid_mask)
return -EINVAL;
+ /* Event flags (ONDIR, ON_CHILD) are meaningless in ignored mask */
+ if (ignored)
+ mask &= ~FANOTIFY_EVENT_FLAGS;
+
f = fdget(fanotify_fd);
if (unlikely(!f.file))
return -EBADF;
@@ -1028,9 +1196,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
* inode events are not supported on a mount mark, because they do not
* carry enough information (i.e. path) to be filtered by mount point.
*/
+ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
if (mask & FANOTIFY_INODE_EVENTS &&
- (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) ||
- mark_type == FAN_MARK_MOUNT))
+ (!fid_mode || mark_type == FAN_MARK_MOUNT))
goto fput_and_out;
if (flags & FAN_MARK_FLUSH) {
@@ -1055,7 +1223,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
goto path_put_and_out;
}
- if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+ if (fid_mode) {
ret = fanotify_test_fid(&path, &__fsid);
if (ret)
goto path_put_and_out;
@@ -1069,6 +1237,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
else
mnt = path.mnt;
+ /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
+ if (mnt || !S_ISDIR(inode->i_mode)) {
+ mask &= ~FAN_EVENT_ON_CHILD;
+ umask = FAN_EVENT_ON_CHILD;
+ /*
+ * If group needs to report parent fid, register for getting
+ * events with parent/name info for non-directory.
+ */
+ if ((fid_mode & FAN_REPORT_DIR_FID) &&
+ (flags & FAN_MARK_ADD) && !ignored)
+ mask |= FAN_EVENT_ON_CHILD;
+ }
+
/* create/update an inode mark */
switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
case FAN_MARK_ADD:
@@ -1085,13 +1266,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
case FAN_MARK_REMOVE:
if (mark_type == FAN_MARK_MOUNT)
ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
- flags);
+ flags, umask);
else if (mark_type == FAN_MARK_FILESYSTEM)
ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
- flags);
+ flags, umask);
else
ret = fanotify_remove_inode_mark(group, inode, mask,
- flags);
+ flags, umask);
break;
default:
ret = -EINVAL;
@@ -1104,26 +1285,23 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return ret;
}
+#ifndef CONFIG_ARCH_SPLIT_ARG64
SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
__u64, mask, int, dfd,
const char __user *, pathname)
{
return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname);
}
+#endif
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(fanotify_mark,
+#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT)
+SYSCALL32_DEFINE6(fanotify_mark,
int, fanotify_fd, unsigned int, flags,
- __u32, mask0, __u32, mask1, int, dfd,
+ SC_ARG64(mask), int, dfd,
const char __user *, pathname)
{
- return do_fanotify_mark(fanotify_fd, flags,
-#ifdef __BIG_ENDIAN
- ((__u64)mask0 << 32) | mask1,
-#else
- ((__u64)mask1 << 32) | mask0,
-#endif
- dfd, pathname);
+ return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask),
+ dfd, pathname);
}
#endif
@@ -1134,12 +1312,15 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
*/
static int __init fanotify_user_setup(void)
{
- BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8);
+ BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
SLAB_PANIC|SLAB_ACCOUNT);
- fanotify_event_cachep = KMEM_CACHE(fanotify_event, SLAB_PANIC);
+ fanotify_fid_event_cachep = KMEM_CACHE(fanotify_fid_event,
+ SLAB_PANIC);
+ fanotify_path_event_cachep = KMEM_CACHE(fanotify_path_event,
+ SLAB_PANIC);
if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
fanotify_perm_event_cachep =
KMEM_CACHE(fanotify_perm_event, SLAB_PANIC);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 1e2bfd2..f0d6b54 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -11,7 +11,6 @@
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
#include <linux/exportfs.h>
#include "inotify/inotify.h"
@@ -50,7 +49,7 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
f.handle.handle_bytes = sizeof(f.pad);
size = f.handle.handle_bytes >> 2;
- ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
+ ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, NULL);
if ((ret == FILEID_INVALID) || (ret < 0)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f44e39c..30d422b 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -74,7 +74,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
iput(iput_inode);
/* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+ fsnotify_inode(inode, FS_UNMOUNT);
fsnotify_inode_delete(inode);
@@ -142,52 +142,178 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
spin_unlock(&inode->i_lock);
}
-/* Notify this dentry's parent about a child's events. */
-int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
+/* Are inode/sb/mount interested in parent and name info with this event? */
+static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
+ __u32 mask)
{
+ __u32 marks_mask = 0;
+
+ /* We only send parent/name to inode/sb/mount for events on non-dir */
+ if (mask & FS_ISDIR)
+ return false;
+
+ /*
+ * All events that are possible on child can also may be reported with
+ * parent/name info to inode/sb/mount. Otherwise, a watching parent
+ * could result in events reported with unexpected name info to sb/mount.
+ */
+ BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
+
+ /* Did either inode/sb/mount subscribe for events with parent/name? */
+ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
+ marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
+ if (mnt)
+ marks_mask |= fsnotify_parent_needed_mask(mnt->mnt_fsnotify_mask);
+
+ /* Did they subscribe for this event with parent/name info? */
+ return mask & marks_mask;
+}
+
+/*
+ * Notify this dentry's parent about a child's events with child name info
+ * if parent is watching or if inode/sb/mount are interested in events with
+ * parent and name info.
+ *
+ * Notify only the child without name info if parent is not watching and
+ * inode/sb/mount are not interested in events with parent and name info.
+ */
+int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
+ int data_type)
+{
+ const struct path *path = fsnotify_data_path(data, data_type);
+ struct mount *mnt = path ? real_mount(path->mnt) : NULL;
+ struct inode *inode = d_inode(dentry);
struct dentry *parent;
- struct inode *p_inode;
+ bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
+ bool parent_needed, parent_interested;
+ __u32 p_mask;
+ struct inode *p_inode = NULL;
+ struct name_snapshot name;
+ struct qstr *file_name = NULL;
int ret = 0;
- if (!dentry)
- dentry = path->dentry;
-
- if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
+ /*
+ * Do inode/sb/mount care about parent and name info on non-dir?
+ * Do they care about any event at all?
+ */
+ if (!inode->i_fsnotify_marks && !inode->i_sb->s_fsnotify_marks &&
+ (!mnt || !mnt->mnt_fsnotify_marks) && !parent_watched)
return 0;
+ parent = NULL;
+ parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
+ if (!parent_watched && !parent_needed)
+ goto notify;
+
+ /* Does parent inode care about events on children? */
parent = dget_parent(dentry);
p_inode = parent->d_inode;
-
- if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
+ p_mask = fsnotify_inode_watches_children(p_inode);
+ if (unlikely(parent_watched && !p_mask))
__fsnotify_update_child_dentry_flags(p_inode);
- } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
- struct name_snapshot name;
- /* we are notifying a parent so come up with the new mask which
- * specifies these are events which came from a child. */
- mask |= FS_EVENT_ON_CHILD;
+ /*
+ * Include parent/name in notification either if some notification
+ * groups require parent info or the parent is interested in this event.
+ */
+ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+ if (parent_needed || parent_interested) {
+ /* When notifying parent, child should be passed as data */
+ WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
+ /* Notify both parent and child with child name info */
take_dentry_name_snapshot(&name, dentry);
- if (path)
- ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
- &name.name, 0);
- else
- ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
- &name.name, 0);
- release_dentry_name_snapshot(&name);
+ file_name = &name.name;
+ if (parent_interested)
+ mask |= FS_EVENT_ON_CHILD;
}
+notify:
+ ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0);
+
+ if (file_name)
+ release_dentry_name_snapshot(&name);
dput(parent);
return ret;
}
EXPORT_SYMBOL_GPL(__fsnotify_parent);
-static int send_to_group(struct inode *to_tell,
- __u32 mask, const void *data,
- int data_is, u32 cookie,
- const struct qstr *file_name,
- struct fsnotify_iter_info *iter_info)
+static int fsnotify_handle_inode_event(struct fsnotify_group *group,
+ struct fsnotify_mark *inode_mark,
+ u32 mask, const void *data, int data_type,
+ struct inode *dir, const struct qstr *name,
+ u32 cookie)
+{
+ const struct path *path = fsnotify_data_path(data, data_type);
+ struct inode *inode = fsnotify_data_inode(data, data_type);
+ const struct fsnotify_ops *ops = group->ops;
+
+ if (WARN_ON_ONCE(!ops->handle_inode_event))
+ return 0;
+
+ if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+ path && d_unlinked(path->dentry))
+ return 0;
+
+ /* Check interest of this mark in case event was sent with two marks */
+ if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
+ return 0;
+
+ return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
+}
+
+static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
+ const void *data, int data_type,
+ struct inode *dir, const struct qstr *name,
+ u32 cookie, struct fsnotify_iter_info *iter_info)
+{
+ struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
+ struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
+ int ret;
+
+ if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
+ WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
+ return 0;
+
+ if (parent_mark) {
+ /*
+ * parent_mark indicates that the parent inode is watching
+ * children and interested in this event, which is an event
+ * possible on child. But is *this mark* watching children and
+ * interested in this event?
+ */
+ if (parent_mark->mask & FS_EVENT_ON_CHILD) {
+ ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+ data, data_type, dir, name, 0);
+ if (ret)
+ return ret;
+ }
+ if (!inode_mark)
+ return 0;
+ }
+
+ if (mask & FS_EVENT_ON_CHILD) {
+ /*
+ * Some events can be sent on both parent dir and child marks
+ * (e.g. FS_ATTRIB). If both parent dir and child are
+ * watching, report the event once to parent dir with name (if
+ * interested) and once to child without name (if interested).
+ * The child watcher is expecting an event without a file name
+ * and without the FS_EVENT_ON_CHILD flag.
+ */
+ mask &= ~FS_EVENT_ON_CHILD;
+ dir = NULL;
+ name = NULL;
+ }
+
+ return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
+ dir, name, cookie);
+}
+
+static int send_to_group(__u32 mask, const void *data, int data_type,
+ struct inode *dir, const struct qstr *file_name,
+ u32 cookie, struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *group = NULL;
__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
@@ -223,16 +349,20 @@ static int send_to_group(struct inode *to_tell,
}
}
- pr_debug("%s: group=%p to_tell=%p mask=%x marks_mask=%x marks_ignored_mask=%x"
- " data=%p data_is=%d cookie=%d\n",
- __func__, group, to_tell, mask, marks_mask, marks_ignored_mask,
- data, data_is, cookie);
+ pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignored_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
+ __func__, group, mask, marks_mask, marks_ignored_mask,
+ data, data_type, dir, cookie);
if (!(test_mask & marks_mask & ~marks_ignored_mask))
return 0;
- return group->ops->handle_event(group, to_tell, mask, data, data_is,
- file_name, cookie, iter_info);
+ if (group->ops->handle_event) {
+ return group->ops->handle_event(group, mask, data, data_type, dir,
+ file_name, cookie, iter_info);
+ }
+
+ return fsnotify_handle_event(group, mask, data, data_type, dir,
+ file_name, cookie, iter_info);
}
static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
@@ -310,28 +440,50 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
}
/*
- * This is the main call to fsnotify. The VFS calls into hook specific functions
- * in linux/fsnotify.h. Those functions then in turn call here. Here will call
- * out to all of the registered fsnotify_group. Those groups can then use the
- * notification event in whatever means they feel necessary.
+ * fsnotify - This is the main call to fsnotify.
+ *
+ * The VFS calls into hook specific functions in linux/fsnotify.h.
+ * Those functions then in turn call here. Here will call out to all of the
+ * registered fsnotify_group. Those groups can then use the notification event
+ * in whatever means they feel necessary.
+ *
+ * @mask: event type and flags
+ * @data: object that event happened on
+ * @data_type: type of object for fanotify_data_XXX() accessors
+ * @dir: optional directory associated with event -
+ * if @file_name is not NULL, this is the directory that
+ * @file_name is relative to
+ * @file_name: optional file name associated with event
+ * @inode: optional inode associated with event -
+ * either @dir or @inode must be non-NULL.
+ * if both are non-NULL event may be reported to both.
+ * @cookie: inotify rename cookie
*/
-int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
- const struct qstr *file_name, u32 cookie)
+int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
+ const struct qstr *file_name, struct inode *inode, u32 cookie)
{
+ const struct path *path = fsnotify_data_path(data, data_type);
struct fsnotify_iter_info iter_info = {};
- struct super_block *sb = to_tell->i_sb;
+ struct super_block *sb;
struct mount *mnt = NULL;
- __u32 mnt_or_sb_mask = sb->s_fsnotify_mask;
+ struct inode *parent = NULL;
int ret = 0;
- __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
+ __u32 test_mask, marks_mask;
- if (data_is == FSNOTIFY_EVENT_PATH) {
- mnt = real_mount(((const struct path *)data)->mnt);
- mnt_or_sb_mask |= mnt->mnt_fsnotify_mask;
+ if (path)
+ mnt = real_mount(path->mnt);
+
+ if (!inode) {
+ /* Dirent event - report on TYPE_INODE to dir */
+ inode = dir;
+ } else if (mask & FS_EVENT_ON_CHILD) {
+ /*
+ * Event on child - report on TYPE_PARENT to dir if it is
+ * watching children and on TYPE_INODE to child.
+ */
+ parent = dir;
}
- /* An event "on child" is not intended for a mount/sb mark */
- if (mask & FS_EVENT_ON_CHILD)
- mnt_or_sb_mask = 0;
+ sb = inode->i_sb;
/*
* Optimization: srcu_read_lock() has a memory barrier which can
@@ -340,28 +492,45 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* SRCU because we have no references to any objects and do not
* need SRCU to keep them "alive".
*/
- if (!to_tell->i_fsnotify_marks && !sb->s_fsnotify_marks &&
- (!mnt || !mnt->mnt_fsnotify_marks))
+ if (!sb->s_fsnotify_marks &&
+ (!mnt || !mnt->mnt_fsnotify_marks) &&
+ (!inode || !inode->i_fsnotify_marks) &&
+ (!parent || !parent->i_fsnotify_marks))
return 0;
+
+ marks_mask = sb->s_fsnotify_mask;
+ if (mnt)
+ marks_mask |= mnt->mnt_fsnotify_mask;
+ if (inode)
+ marks_mask |= inode->i_fsnotify_mask;
+ if (parent)
+ marks_mask |= parent->i_fsnotify_mask;
+
+
/*
* if this is a modify event we may need to clear the ignored masks
- * otherwise return if neither the inode nor the vfsmount/sb care about
- * this type of event.
+ * otherwise return if none of the marks care about this type of event.
*/
- if (!(mask & FS_MODIFY) &&
- !(test_mask & (to_tell->i_fsnotify_mask | mnt_or_sb_mask)))
+ test_mask = (mask & ALL_FSNOTIFY_EVENTS);
+ if (!(mask & FS_MODIFY) && !(test_mask & marks_mask))
return 0;
iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
- iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
- fsnotify_first_mark(&to_tell->i_fsnotify_marks);
iter_info.marks[FSNOTIFY_OBJ_TYPE_SB] =
fsnotify_first_mark(&sb->s_fsnotify_marks);
if (mnt) {
iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
}
+ if (inode) {
+ iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
+ fsnotify_first_mark(&inode->i_fsnotify_marks);
+ }
+ if (parent) {
+ iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
+ fsnotify_first_mark(&parent->i_fsnotify_marks);
+ }
/*
* We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
@@ -369,8 +538,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* That's why this traversal is so complicated...
*/
while (fsnotify_iter_select_report_types(&iter_info)) {
- ret = send_to_group(to_tell, mask, data, data_is, cookie,
- file_name, &iter_info);
+ ret = send_to_group(mask, data, data_type, dir, file_name,
+ cookie, &iter_info);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
@@ -385,8 +554,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
}
EXPORT_SYMBOL_GPL(fsnotify);
-extern struct kmem_cache *fsnotify_mark_connector_cachep;
-
static __init int fsnotify_init(void)
{
int ret;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index f346282..ff2063e 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -65,4 +65,6 @@ extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
+extern struct kmem_cache *fsnotify_mark_connector_cachep;
+
#endif /* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 133f723..a4a4b1c 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -25,6 +25,7 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
group->ops->free_group_priv(group);
mem_cgroup_put(group->memcg);
+ mutex_destroy(&group->mark_mutex);
kfree(group);
}
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 6736e47..1cc8be2 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -3,7 +3,7 @@
bool "Inotify support for userspace"
select FSNOTIFY
default y
- ---help---
+ help
Say Y here to enable inotify support for userspace, including the
associated system calls. Inotify allows monitoring of both files and
directories via a single open fd. Events are read from the file
@@ -12,6 +12,6 @@
new features including multiple file events, one-shot support, and
unmount notification.
- For more information, see <file:Documentation/filesystems/inotify.txt>
+ For more information, see <file:Documentation/filesystems/inotify.rst>
If unsure, say Y.
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 3f246f7..2007e37 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
-extern int inotify_handle_event(struct fsnotify_group *group,
- struct inode *inode,
- u32 mask, const void *data, int data_type,
- const struct qstr *file_name, u32 cookie,
- struct fsnotify_iter_info *iter_info);
+extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
+ u32 mask, struct inode *inode,
+ struct inode *dir,
+ const struct qstr *name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
extern struct kmem_cache *inotify_inode_mark_cachep;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 589dee9..1901d79 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -39,7 +39,7 @@ static bool event_compare(struct fsnotify_event *old_fsn,
if (old->mask & FS_IN_IGNORED)
return false;
if ((old->mask == new->mask) &&
- (old_fsn->objectid == new_fsn->objectid) &&
+ (old->wd == new->wd) &&
(old->name_len == new->name_len) &&
(!old->name_len || !strcmp(old->name, new->name)))
return true;
@@ -55,36 +55,25 @@ static int inotify_merge(struct list_head *list,
return event_compare(last_event, event);
}
-int inotify_handle_event(struct fsnotify_group *group,
- struct inode *inode,
- u32 mask, const void *data, int data_type,
- const struct qstr *file_name, u32 cookie,
- struct fsnotify_iter_info *iter_info)
+int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+ struct inode *inode, struct inode *dir,
+ const struct qstr *name, u32 cookie)
{
- struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
struct fsnotify_event *fsn_event;
+ struct fsnotify_group *group = inode_mark->group;
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
+ struct mem_cgroup *old_memcg;
- if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
- return 0;
-
- if ((inode_mark->mask & FS_EXCL_UNLINK) &&
- (data_type == FSNOTIFY_EVENT_PATH)) {
- const struct path *path = data;
-
- if (d_unlinked(path->dentry))
- return 0;
- }
- if (file_name) {
- len = file_name->len;
+ if (name) {
+ len = name->len;
alloc_len += len + 1;
}
- pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
+ pr_debug("%s: group=%p mark=%p mask=%x\n", __func__, group, inode_mark,
mask);
i_mark = container_of(inode_mark, struct inotify_inode_mark,
@@ -95,9 +84,9 @@ int inotify_handle_event(struct fsnotify_group *group,
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
*/
- memalloc_use_memcg(group->memcg);
+ old_memcg = set_active_memcg(group->memcg);
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
- memalloc_unuse_memcg();
+ set_active_memcg(old_memcg);
if (unlikely(!event)) {
/*
@@ -118,13 +107,13 @@ int inotify_handle_event(struct fsnotify_group *group,
mask &= ~IN_ISDIR;
fsn_event = &event->fse;
- fsnotify_init_event(fsn_event, (unsigned long)inode);
+ fsnotify_init_event(fsn_event, 0);
event->mask = mask;
event->wd = i_mark->wd;
event->sync_cookie = cookie;
event->name_len = len;
if (len)
- strcpy(event->name, file_name->name);
+ strcpy(event->name, name->name);
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
if (ret) {
@@ -203,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
}
const struct fsnotify_ops inotify_fsnotify_ops = {
- .handle_event = inotify_handle_event,
+ .handle_inode_event = inotify_handle_inode_event,
.free_group_priv = inotify_free_group_priv,
.free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 81ffc86..5f6c6bf 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -75,15 +75,17 @@ struct ctl_table inotify_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static inline __u32 inotify_arg_to_mask(u32 arg)
+static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg)
{
__u32 mask;
/*
- * everything should accept their own ignored, cares about children,
- * and should receive events when the inode is unmounted
+ * Everything should accept their own ignored and should receive events
+ * when the inode is unmounted. All directories care about children.
*/
- mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
+ mask = (FS_IN_IGNORED | FS_UNMOUNT);
+ if (S_ISDIR(inode->i_mode))
+ mask |= FS_EVENT_ON_CHILD;
/* mask off the flags used to open the fd */
mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
@@ -484,14 +486,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
- struct fsnotify_iter_info iter_info = { };
-
- fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
- fsn_mark);
/* Queue ignore event for the watch */
- inotify_handle_event(group, NULL, FS_IN_IGNORED, NULL,
- FSNOTIFY_EVENT_NONE, NULL, 0, &iter_info);
+ inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
+ 0);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@@ -512,7 +510,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
int create = (arg & IN_MASK_CREATE);
int ret;
- mask = inotify_arg_to_mask(arg);
+ mask = inotify_arg_to_mask(inode, arg);
fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
@@ -565,7 +563,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
- mask = inotify_arg_to_mask(arg);
+ mask = inotify_arg_to_mask(inode, arg);
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
@@ -764,20 +762,18 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
struct fsnotify_group *group;
struct inotify_inode_mark *i_mark;
struct fd f;
- int ret = 0;
+ int ret = -EINVAL;
f = fdget(fd);
if (unlikely(!f.file))
return -EBADF;
/* verify that this is indeed an inotify instance */
- ret = -EINVAL;
if (unlikely(f.file->f_op != &inotify_fops))
goto out;
group = f.file->private_data;
- ret = -EINVAL;
i_mark = inotify_idr_find(group, wd);
if (unlikely(!i_mark))
goto out;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 1d96216..8387937 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -325,13 +325,16 @@ static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
}
bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+ __releases(&fsnotify_mark_srcu)
{
int type;
fsnotify_foreach_obj_type(type) {
/* This can fail if mark is being removed */
- if (!fsnotify_get_mark_safe(iter_info->marks[type]))
+ if (!fsnotify_get_mark_safe(iter_info->marks[type])) {
+ __release(&fsnotify_mark_srcu);
goto fail;
+ }
}
/*
@@ -350,6 +353,7 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
}
void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
+ __acquires(&fsnotify_mark_srcu)
{
int type;