linux/fs/proc/root.c
Linus Torvalds c7c4591db6 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull namespace changes from Eric Biederman:
 "This is an assorted mishmash of small cleanups, enhancements and bug
  fixes.

  The major theme is user namespace mount restrictions.  nsown_capable
  is killed as it encourages not thinking about details that need to be
  considered.  A very hard to hit pid namespace exiting bug was finally
  tracked and fixed.  A couple of cleanups to the basic namespace
  infrastructure.

  Finally there is an enhancement that makes per user namespace
  capabilities usable as capabilities, and an enhancement that allows
  the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns:  Kill nsown_capable it makes the wrong thing easy
  capabilities: allow nice if we are privileged
  pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
  userns: Allow PR_CAPBSET_DROP in a user namespace.
  namespaces: Simplify copy_namespaces so it is clear what is going on.
  pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
  sysfs: Restrict mounting sysfs
  userns: Better restrictions on when proc and sysfs can be mounted
  vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
  kernel/nsproxy.c: Improving a snippet of code.
  proc: Restrict mounting the proc filesystem
  vfs: Lock in place mounts from more privileged users
2013-09-07 14:35:32 -07:00

271 lines
5.6 KiB
C

/*
* linux/fs/proc/root.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* proc root directory handling functions
*/
#include <asm/uaccess.h>
#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/user_namespace.h>
#include <linux/mount.h>
#include <linux/pid_namespace.h>
#include <linux/parser.h>
#include "internal.h"
static int proc_test_super(struct super_block *sb, void *data)
{
return sb->s_fs_info == data;
}
static int proc_set_super(struct super_block *sb, void *data)
{
int err = set_anon_super(sb, NULL);
if (!err) {
struct pid_namespace *ns = (struct pid_namespace *)data;
sb->s_fs_info = get_pid_ns(ns);
}
return err;
}
enum {
Opt_gid, Opt_hidepid, Opt_err,
};
static const match_table_t tokens = {
{Opt_hidepid, "hidepid=%u"},
{Opt_gid, "gid=%u"},
{Opt_err, NULL},
};
static int proc_parse_options(char *options, struct pid_namespace *pid)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
switch (token) {
case Opt_gid:
if (match_int(&args[0], &option))
return 0;
pid->pid_gid = make_kgid(current_user_ns(), option);
break;
case Opt_hidepid:
if (match_int(&args[0], &option))
return 0;
if (option < 0 || option > 2) {
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
pid->hide_pid = option;
break;
default:
pr_err("proc: unrecognized mount option \"%s\" "
"or missing value\n", p);
return 0;
}
}
return 1;
}
int proc_remount(struct super_block *sb, int *flags, char *data)
{
struct pid_namespace *pid = sb->s_fs_info;
return !proc_parse_options(data, pid);
}
static struct dentry *proc_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
int err;
struct super_block *sb;
struct pid_namespace *ns;
char *options;
if (flags & MS_KERNMOUNT) {
ns = (struct pid_namespace *)data;
options = NULL;
} else {
ns = task_active_pid_ns(current);
options = data;
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);
/* Does the mounter have privilege over the pid namespace? */
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
if (IS_ERR(sb))
return ERR_CAST(sb);
if (!proc_parse_options(options, ns)) {
deactivate_locked_super(sb);
return ERR_PTR(-EINVAL);
}
if (!sb->s_root) {
err = proc_fill_super(sb);
if (err) {
deactivate_locked_super(sb);
return ERR_PTR(err);
}
sb->s_flags |= MS_ACTIVE;
}
return dget(sb->s_root);
}
static void proc_kill_sb(struct super_block *sb)
{
struct pid_namespace *ns;
ns = (struct pid_namespace *)sb->s_fs_info;
if (ns->proc_self)
dput(ns->proc_self);
kill_anon_super(sb);
put_pid_ns(ns);
}
static struct file_system_type proc_fs_type = {
.name = "proc",
.mount = proc_mount,
.kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
};
void __init proc_root_init(void)
{
int err;
proc_init_inodecache();
err = register_filesystem(&proc_fs_type);
if (err)
return;
proc_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL);
proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_mkdir("openprom", NULL);
#endif
proc_tty_init();
#ifdef CONFIG_PROC_DEVICETREE
proc_device_tree_init();
#endif
proc_mkdir("bus", NULL);
proc_sys_init();
}
static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
)
{
generic_fillattr(dentry->d_inode, stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
if (!proc_lookup(dir, dentry, flags))
return NULL;
return proc_pid_lookup(dir, dentry, flags);
}
static int proc_root_readdir(struct file *file, struct dir_context *ctx)
{
if (ctx->pos < FIRST_PROCESS_ENTRY) {
int error = proc_readdir(file, ctx);
if (unlikely(error <= 0))
return error;
ctx->pos = FIRST_PROCESS_ENTRY;
}
return proc_pid_readdir(file, ctx);
}
/*
* The root /proc directory is special, as it has the
* <pid> directories. Thus we don't use the generic
* directory handling functions for that..
*/
static const struct file_operations proc_root_operations = {
.read = generic_read_dir,
.iterate = proc_root_readdir,
.llseek = default_llseek,
};
/*
* proc root can do almost nothing..
*/
static const struct inode_operations proc_root_inode_operations = {
.lookup = proc_root_lookup,
.getattr = proc_root_getattr,
};
/*
* This is the root "inode" in the /proc tree..
*/
struct proc_dir_entry proc_root = {
.low_ino = PROC_ROOT_INO,
.namelen = 5,
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
.nlink = 2,
.count = ATOMIC_INIT(1),
.proc_iops = &proc_root_inode_operations,
.proc_fops = &proc_root_operations,
.parent = &proc_root,
.name = "/proc",
};
int pid_ns_prepare_proc(struct pid_namespace *ns)
{
struct vfsmount *mnt;
mnt = kern_mount_data(&proc_fs_type, ns);
if (IS_ERR(mnt))
return PTR_ERR(mnt);
ns->proc_mnt = mnt;
return 0;
}
void pid_ns_release_proc(struct pid_namespace *ns)
{
kern_unmount(ns->proc_mnt);
}