diff --git a/fs/proc/base.c b/fs/proc/base.c index fbff900fd5ad..6afca09a6534 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -350,18 +350,21 @@ struct proc_mounts { static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = get_proc_task(inode); + struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct proc_mounts *p; int ret = -EINVAL; if (task) { - task_lock(task); - if (task->nsproxy) { - ns = task->nsproxy->mnt_ns; + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; if (ns) get_mnt_ns(ns); } - task_unlock(task); + rcu_read_unlock(); + put_task_struct(task); } @@ -424,16 +427,20 @@ static int mountstats_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; + struct nsproxy *nsp; struct mnt_namespace *mnt_ns = NULL; struct task_struct *task = get_proc_task(inode); if (task) { - task_lock(task); - if (task->nsproxy) - mnt_ns = task->nsproxy->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - task_unlock(task); + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + mnt_ns = nsp->mnt_ns; + if (mnt_ns) + get_mnt_ns(mnt_ns); + } + rcu_read_unlock(); + put_task_struct(task); } diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index f1eca68751a9..0e66b57631fc 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -32,8 +32,39 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; +/* + * the namespaces access rules are: + * + * 1. only current task is allowed to change tsk->nsproxy pointer or + * any pointer on the nsproxy itself + * + * 2. when accessing (i.e. reading) current task's namespaces - no + * precautions should be taken - just dereference the pointers + * + * 3. the access to other task namespaces is performed like this + * rcu_read_lock(); + * nsproxy = task_nsproxy(tsk); + * if (nsproxy != NULL) { + * / * + * * work with the namespaces here + * * e.g. get the reference on one of them + * * / + * } / * + * * NULL task_nsproxy() means that this task is + * * almost dead (zombie) + * * / + * rcu_read_unlock(); + * + */ + +static inline struct nsproxy *task_nsproxy(struct task_struct *tsk) +{ + return rcu_dereference(tsk->nsproxy); +} + int copy_namespaces(unsigned long flags, struct task_struct *tsk); -void get_task_namespaces(struct task_struct *tsk); +void exit_task_namespaces(struct task_struct *tsk); +void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct fs_struct *); @@ -45,15 +76,9 @@ static inline void put_nsproxy(struct nsproxy *ns) } } -static inline void exit_task_namespaces(struct task_struct *p) +static inline void get_nsproxy(struct nsproxy *ns) { - struct nsproxy *ns = p->nsproxy; - if (ns) { - task_lock(p); - p->nsproxy = NULL; - task_unlock(p); - put_nsproxy(ns); - } + atomic_inc(&ns->count); } #ifdef CONFIG_CGROUP_NS diff --git a/kernel/exit.c b/kernel/exit.c index d22aefabb129..db9764186d5a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -400,9 +400,10 @@ void daemonize(const char *name, ...) current->fs = fs; atomic_inc(&fs->count); - exit_task_namespaces(current); - current->nsproxy = init_task.nsproxy; - get_task_namespaces(current); + if (current->nsproxy != init_task.nsproxy) { + get_nsproxy(init_task.nsproxy); + switch_task_namespaces(current, init_task.nsproxy); + } exit_files(current); current->files = init_task.files; diff --git a/kernel/fork.c b/kernel/fork.c index 2deaf481efab..d2f4a420a5b9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1632,7 +1632,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; struct sem_undo_list *new_ulist = NULL; - struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL; + struct nsproxy *new_nsproxy = NULL; check_unshare_flags(&unshare_flags); @@ -1662,14 +1662,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { - task_lock(current); - if (new_nsproxy) { - old_nsproxy = current->nsproxy; - current->nsproxy = new_nsproxy; - new_nsproxy = old_nsproxy; + switch_task_namespaces(current, new_nsproxy); + new_nsproxy = NULL; } + task_lock(current); + if (new_fs) { fs = current->fs; current->fs = new_fs; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e981c61304f1..c8ef7c2992ed 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep; struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); -static inline void get_nsproxy(struct nsproxy *ns) -{ - atomic_inc(&ns->count); -} - -void get_task_namespaces(struct task_struct *tsk) -{ - struct nsproxy *ns = tsk->nsproxy; - if (ns) { - get_nsproxy(ns); - } -} - /* * creates a copy of "orig" with refcount 1. */ @@ -216,6 +203,33 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, return err; } +void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) +{ + struct nsproxy *ns; + + might_sleep(); + + ns = p->nsproxy; + + rcu_assign_pointer(p->nsproxy, new); + + if (ns && atomic_dec_and_test(&ns->count)) { + /* + * wait for others to get what they want from this nsproxy. + * + * cannot release this nsproxy via the call_rcu() since + * put_mnt_ns() will want to sleep + */ + synchronize_rcu(); + free_nsproxy(ns); + } +} + +void exit_task_namespaces(struct task_struct *p) +{ + switch_task_namespaces(p, NULL); +} + static int __init nsproxy_cache_init(void) { nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1072d16696c3..4a2640d38261 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid) rcu_read_lock(); tsk = find_task_by_pid(pid); if (tsk) { - task_lock(tsk); - if (tsk->nsproxy) - net = get_net(tsk->nsproxy->net_ns); - task_unlock(tsk); + struct nsproxy *nsproxy; + nsproxy = task_nsproxy(tsk); + if (nsproxy) + net = get_net(nsproxy->net_ns); } rcu_read_unlock(); return net;