diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index d77c624c61f6..53fcbf79bdca 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = { static int nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *name) + const struct qstr *name, u32 cookie) { trace_nfsd_file_fsnotify_handle_event(inode, mask); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5486aaca60b0..e85e13c50d6d 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) */ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *name) + const struct qstr *name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 9167884a61ec..1192c9953620 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, continue; /* - * If the event is for a child and this mark is on a parent not + * If the event is on a child and this mark is on a parent not * watching children, don't send it! */ - if (event_mask & FS_EVENT_ON_CHILD && - type == FSNOTIFY_OBJ_TYPE_INODE && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (type == FSNOTIFY_OBJ_TYPE_PARENT && + !(mark->mask & FS_EVENT_ON_CHILD)) continue; marks_mask |= mark->mask; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8d3ad5ef2925..30d422b8c0fc 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt, if (mask & FS_ISDIR) return false; + /* + * All events that are possible on child can also may be reported with + * parent/name info to inode/sb/mount. Otherwise, a watching parent + * could result in events reported with unexpected name info to sb/mount. + */ + BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT); + /* Did either inode/sb/mount subscribe for events with parent/name? */ marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask); marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask); @@ -232,47 +239,76 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, } EXPORT_SYMBOL_GPL(__fsnotify_parent); +static int fsnotify_handle_inode_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *name, + u32 cookie) +{ + const struct path *path = fsnotify_data_path(data, data_type); + struct inode *inode = fsnotify_data_inode(data, data_type); + const struct fsnotify_ops *ops = group->ops; + + if (WARN_ON_ONCE(!ops->handle_inode_event)) + return 0; + + if ((inode_mark->mask & FS_EXCL_UNLINK) && + path && d_unlinked(path->dentry)) + return 0; + + /* Check interest of this mark in case event was sent with two marks */ + if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS)) + return 0; + + return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie); +} + static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie, struct fsnotify_iter_info *iter_info) { struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info); - struct inode *inode = fsnotify_data_inode(data, data_type); - const struct fsnotify_ops *ops = group->ops; + struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info); int ret; - if (WARN_ON_ONCE(!ops->handle_inode_event)) - return 0; - if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) || WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info))) return 0; - /* - * An event can be sent on child mark iterator instead of inode mark - * iterator because of other groups that have interest of this inode - * and have marks on both parent and child. We can simplify this case. - */ - if (!inode_mark) { - inode_mark = child_mark; - child_mark = NULL; + if (parent_mark) { + /* + * parent_mark indicates that the parent inode is watching + * children and interested in this event, which is an event + * possible on child. But is *this mark* watching children and + * interested in this event? + */ + if (parent_mark->mask & FS_EVENT_ON_CHILD) { + ret = fsnotify_handle_inode_event(group, parent_mark, mask, + data, data_type, dir, name, 0); + if (ret) + return ret; + } + if (!inode_mark) + return 0; + } + + if (mask & FS_EVENT_ON_CHILD) { + /* + * Some events can be sent on both parent dir and child marks + * (e.g. FS_ATTRIB). If both parent dir and child are + * watching, report the event once to parent dir with name (if + * interested) and once to child without name (if interested). + * The child watcher is expecting an event without a file name + * and without the FS_EVENT_ON_CHILD flag. + */ + mask &= ~FS_EVENT_ON_CHILD; dir = NULL; name = NULL; } - ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name); - if (ret || !child_mark) - return ret; - - /* - * Some events can be sent on both parent dir and child marks - * (e.g. FS_ATTRIB). If both parent dir and child are watching, - * report the event once to parent dir with name and once to child - * without name. - */ - return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL); + return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type, + dir, name, cookie); } static int send_to_group(__u32 mask, const void *data, int data_type, @@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, struct fsnotify_iter_info iter_info = {}; struct super_block *sb; struct mount *mnt = NULL; - struct inode *child = NULL; + struct inode *parent = NULL; int ret = 0; __u32 test_mask, marks_mask; @@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, inode = dir; } else if (mask & FS_EVENT_ON_CHILD) { /* - * Event on child - report on TYPE_INODE to dir if it is - * watching children and on TYPE_CHILD to child. + * Event on child - report on TYPE_PARENT to dir if it is + * watching children and on TYPE_INODE to child. */ - child = inode; - inode = dir; + parent = dir; } sb = inode->i_sb; @@ -460,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if (!sb->s_fsnotify_marks && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!child || !child->i_fsnotify_marks)) + (!parent || !parent->i_fsnotify_marks)) return 0; marks_mask = sb->s_fsnotify_mask; @@ -468,8 +503,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, marks_mask |= mnt->mnt_fsnotify_mask; if (inode) marks_mask |= inode->i_fsnotify_mask; - if (child) - marks_mask |= child->i_fsnotify_mask; + if (parent) + marks_mask |= parent->i_fsnotify_mask; /* @@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] = fsnotify_first_mark(&inode->i_fsnotify_marks); } - if (child) { - iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] = - fsnotify_first_mark(&child->i_fsnotify_marks); + if (parent) { + iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] = + fsnotify_first_mark(&parent->i_fsnotify_marks); } /* diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 4327d0e9c364..2007e3711916 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse) extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group); -extern int inotify_handle_event(struct fsnotify_group *group, u32 mask, - const void *data, int data_type, - struct inode *dir, - const struct qstr *file_name, u32 cookie, - struct fsnotify_iter_info *iter_info); +extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, + u32 mask, struct inode *inode, + struct inode *dir, + const struct qstr *name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; extern struct kmem_cache *inotify_inode_mark_cachep; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 9ddcbadc98e2..1901d799909b 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list, return event_compare(last_event, event); } -static int inotify_one_event(struct fsnotify_group *group, u32 mask, - struct fsnotify_mark *inode_mark, - const struct path *path, - const struct qstr *file_name, u32 cookie) +int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, + struct inode *inode, struct inode *dir, + const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; + struct fsnotify_group *group = inode_mark->group; int ret; int len = 0; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; - if ((inode_mark->mask & FS_EXCL_UNLINK) && - path && d_unlinked(path->dentry)) - return 0; - - if (file_name) { - len = file_name->len; + if (name) { + len = name->len; alloc_len += len + 1; } @@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask, event->sync_cookie = cookie; event->name_len = len; if (len) - strcpy(event->name, file_name->name); + strcpy(event->name, name->name); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { @@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask, return 0; } -int inotify_handle_event(struct fsnotify_group *group, u32 mask, - const void *data, int data_type, struct inode *dir, - const struct qstr *file_name, u32 cookie, - struct fsnotify_iter_info *iter_info) -{ - const struct path *path = fsnotify_data_path(data, data_type); - struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info); - struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info); - int ret = 0; - - if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info))) - return 0; - - /* - * Some events cannot be sent on both parent and child marks - * (e.g. IN_CREATE). Those events are always sent on inode_mark. - * For events that are possible on both parent and child (e.g. IN_OPEN), - * event is sent on inode_mark with name if the parent is watching and - * is sent on child_mark without name if child is watching. - * If both parent and child are watching, report the event with child's - * name here and report another event without child's name below. - */ - if (inode_mark) - ret = inotify_one_event(group, mask, inode_mark, path, - file_name, cookie); - if (ret || !child_mark) - return ret; - - return inotify_one_event(group, mask, child_mark, path, NULL, 0); -} - static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); @@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) } const struct fsnotify_ops inotify_fsnotify_ops = { - .handle_event = inotify_handle_event, + .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 186722ba3894..59c177011a0f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -37,6 +37,15 @@ #include +/* + * An inotify watch requires allocating an inotify_inode_mark structure as + * well as pinning the watched inode. Doubling the size of a VFS inode + * should be more than enough to cover the additional filesystem inode + * size increase. + */ +#define INOTIFY_WATCH_COST (sizeof(struct inotify_inode_mark) + \ + 2 * sizeof(struct inode)) + /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; @@ -486,14 +495,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { struct inotify_inode_mark *i_mark; - struct fsnotify_iter_info iter_info = { }; - - fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE, - fsn_mark); /* Queue ignore event for the watch */ - inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, - NULL, NULL, 0, &iter_info); + inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL, + 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -801,6 +806,18 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) */ static int __init inotify_user_setup(void) { + unsigned long watches_max; + struct sysinfo si; + + si_meminfo(&si); + /* + * Allow up to 1% of addressable memory to be allocated for inotify + * watches (per user) limited to the range [8192, 1048576]. + */ + watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) / + INOTIFY_WATCH_COST; + watches_max = clamp(watches_max, 8192UL, 1048576UL); + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); @@ -827,7 +844,7 @@ static int __init inotify_user_setup(void) inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; - init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; return 0; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f8529a3a2923..a2e42d3cd87c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -137,6 +137,7 @@ struct mem_cgroup; * if @file_name is not NULL, this is the directory that * @file_name is relative to. * @file_name: optional file name associated with event + * @cookie: inotify rename cookie * * free_group_priv - called when a group refcnt hits 0 to clean up the private union * freeing_mark - called when a mark is being destroyed for some reason. The group @@ -151,7 +152,7 @@ struct fsnotify_ops { struct fsnotify_iter_info *iter_info); int (*handle_inode_event)(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name); + const struct qstr *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); @@ -277,7 +278,7 @@ static inline const struct path *fsnotify_data_path(const void *data, enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, - FSNOTIFY_OBJ_TYPE_CHILD, + FSNOTIFY_OBJ_TYPE_PARENT, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, FSNOTIFY_OBJ_TYPE_COUNT, @@ -285,7 +286,7 @@ enum fsnotify_obj_type { }; #define FSNOTIFY_OBJ_TYPE_INODE_FL (1U << FSNOTIFY_OBJ_TYPE_INODE) -#define FSNOTIFY_OBJ_TYPE_CHILD_FL (1U << FSNOTIFY_OBJ_TYPE_CHILD) +#define FSNOTIFY_OBJ_TYPE_PARENT_FL (1U << FSNOTIFY_OBJ_TYPE_PARENT) #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL (1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT) #define FSNOTIFY_OBJ_TYPE_SB_FL (1U << FSNOTIFY_OBJ_TYPE_SB) #define FSNOTIFY_OBJ_ALL_TYPES_MASK ((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1) @@ -330,7 +331,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \ } FSNOTIFY_ITER_FUNCS(inode, INODE) -FSNOTIFY_ITER_FUNCS(child, CHILD) +FSNOTIFY_ITER_FUNCS(parent, PARENT) FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT) FSNOTIFY_ITER_FUNCS(sb, SB) diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index bfcfcd61adb6..5b3f01da172b 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -154,7 +154,7 @@ static void audit_autoremove_mark_rule(struct audit_fsnotify_mark *audit_mark) /* Update mark data in audit rules based on fsnotify events. */ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *dname) + const struct qstr *dname, u32 cookie) { struct audit_fsnotify_mark *audit_mark; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 83e1c07fc99e..6c91902f4f45 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -1037,7 +1037,7 @@ static void evict_chunk(struct audit_chunk *chunk) static int audit_tree_handle_event(struct fsnotify_mark *mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *file_name) + const struct qstr *file_name, u32 cookie) { return 0; } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 246e5ba704c0..2acf7ca49154 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -466,7 +466,7 @@ void audit_remove_watch_rule(struct audit_krule *krule) /* Update watch data in audit rules based on fsnotify events. */ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, - const struct qstr *dname) + const struct qstr *dname, u32 cookie) { struct audit_parent *parent;