diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 5375582ea5f6..f2ff578fc03a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); * @pos: the css * to use as the loop cursor * @parent: css whose children to walk * - * Walk @parent's children. Must be called under rcu_read_lock(). A child - * css which hasn't finished ->css_online() or already has finished - * ->css_offline() may show up during traversal and it's each subsystem's - * responsibility to verify that each @pos is alive. + * Walk @parent's children. Must be called under rcu_read_lock(). * - * If a subsystem synchronizes against the parent in its ->css_online() and - * before starting iterating, a css which finished ->css_online() is - * guaranteed to be visible in the future iterations. + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. * * It is allowed to temporarily drop RCU read lock during iteration. The * caller is responsible for ensuring that @pos remains accessible until @@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); * @root: css whose descendants to walk * * Walk @root's descendants. @root is included in the iteration and the - * first node to be visited. Must be called under rcu_read_lock(). A - * descendant css which hasn't finished ->css_online() or already has - * finished ->css_offline() may show up during traversal and it's each - * subsystem's responsibility to verify that each @pos is alive. + * first node to be visited. Must be called under rcu_read_lock(). * - * If a subsystem synchronizes against the parent in its ->css_online() and - * before starting iterating, and synchronizes against @pos on each - * iteration, any descendant css which finished ->css_online() is - * guaranteed to be visible in the future iterations. + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. * - * In other words, the following guarantees that a descendant can't escape + * For example, the following guarantees that a descendant can't escape * state updates of its ancestors. * * my_online(@css) @@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, * * Similar to css_for_each_descendant_pre() but performs post-order * traversal instead. @root is included in the iteration and the last - * node to be visited. Note that the walk visibility guarantee described - * in pre-order walk doesn't apply the same to post-order walks. + * node to be visited. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. + * + * Note that the walk visibility guarantee example described in pre-order + * walk doesn't apply the same to post-order walks. */ #define css_for_each_descendant_post(pos, css) \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5544e685f2da..097a1fc1e1e8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp) /** * css_next_child - find the next child of a given css - * @pos_css: the current position (%NULL to initiate traversal) - * @parent_css: css whose children to walk + * @pos: the current position (%NULL to initiate traversal) + * @parent: css whose children to walk * - * This function returns the next child of @parent_css and should be called + * This function returns the next child of @parent and should be called * under either cgroup_mutex or RCU read lock. The only requirement is - * that @parent_css and @pos_css are accessible. The next sibling is - * guaranteed to be returned regardless of their states. + * that @parent and @pos are accessible. The next sibling is guaranteed to + * be returned regardless of their states. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ -struct cgroup_subsys_state * -css_next_child(struct cgroup_subsys_state *pos_css, - struct cgroup_subsys_state *parent_css) +struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, + struct cgroup_subsys_state *parent) { - struct cgroup *pos = pos_css ? pos_css->cgroup : NULL; - struct cgroup *cgrp = parent_css->cgroup; - struct cgroup *next; + struct cgroup_subsys_state *next; cgroup_assert_mutex_or_rcu_locked(); @@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css, * races against release and the race window is very small. */ if (!pos) { - next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling); - } else if (likely(!(pos->self.flags & CSS_RELEASED))) { - next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling); + next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling); + } else if (likely(!(pos->flags & CSS_RELEASED))) { + next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling); } else { - list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling) - if (next->self.serial_nr > pos->self.serial_nr) + list_for_each_entry_rcu(next, &parent->children, sibling) + if (next->serial_nr > pos->serial_nr) break; } /* * @next, if not pointing to the head, can be dereferenced and is - * the next sibling; however, it might have @ss disabled. If so, - * fast-forward to the next enabled one. + * the next sibling. */ - while (&next->self.sibling != &cgrp->self.children) { - struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss); - - if (next_css) - return next_css; - next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling); - } + if (&next->sibling != &parent->children) + return next; return NULL; } @@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css, * doesn't require the whole traversal to be contained in a single critical * section. This function will return the correct next descendant as long * as both @pos and @root are accessible and @pos is a descendant of @root. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_pre(struct cgroup_subsys_state *pos, @@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) * section. This function will return the correct next descendant as long * as both @pos and @cgroup are accessible and @pos is a descendant of * @cgroup. + * + * If a subsystem synchronizes ->css_online() and the start of iteration, a + * css which finished ->css_online() is guaranteed to be visible in the + * future iterations and will stay visible until the last reference is put. + * A css which hasn't finished ->css_online() or already finished + * ->css_offline() may show up during traversal. It's each subsystem's + * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_post(struct cgroup_subsys_state *pos,