linux/fs/stat.c
Linus Torvalds 644473e9c6 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace enhancements from Eric Biederman:
 "This is a course correction for the user namespace, so that we can
  reach an inexpensive, maintainable, and reasonably complete
  implementation.

  Highlights:
   - Config guards make it impossible to enable the user namespace and
     code that has not been converted to be user namespace safe.

   - Use of the new kuid_t type ensures the if you somehow get past the
     config guards the kernel will encounter type errors if you enable
     user namespaces and attempt to compile in code whose permission
     checks have not been updated to be user namespace safe.

   - All uids from child user namespaces are mapped into the initial
     user namespace before they are processed.  Removing the need to add
     an additional check to see if the user namespace of the compared
     uids remains the same.

   - With the user namespaces compiled out the performance is as good or
     better than it is today.

   - For most operations absolutely nothing changes performance or
     operationally with the user namespace enabled.

   - The worst case performance I could come up with was timing 1
     billion cache cold stat operations with the user namespace code
     enabled.  This went from 156s to 164s on my laptop (or 156ns to
     164ns per stat operation).

   - (uid_t)-1 and (gid_t)-1 are reserved as an internal error value.
     Most uid/gid setting system calls treat these value specially
     anyway so attempting to use -1 as a uid would likely cause
     entertaining failures in userspace.

   - If setuid is called with a uid that can not be mapped setuid fails.
     I have looked at sendmail, login, ssh and every other program I
     could think of that would call setuid and they all check for and
     handle the case where setuid fails.

   - If stat or a similar system call is called from a context in which
     we can not map a uid we lie and return overflowuid.  The LFS
     experience suggests not lying and returning an error code might be
     better, but the historical precedent with uids is different and I
     can not think of anything that would break by lying about a uid we
     can't map.

   - Capabilities are localized to the current user namespace making it
     safe to give the initial user in a user namespace all capabilities.

  My git tree covers all of the modifications needed to convert the core
  kernel and enough changes to make a system bootable to runlevel 1."

Fix up trivial conflicts due to nearby independent changes in fs/stat.c

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (46 commits)
  userns:  Silence silly gcc warning.
  cred: use correct cred accessor with regards to rcu read lock
  userns: Convert the move_pages, and migrate_pages permission checks to use uid_eq
  userns: Convert cgroup permission checks to use uid_eq
  userns: Convert tmpfs to use kuid and kgid where appropriate
  userns: Convert sysfs to use kgid/kuid where appropriate
  userns: Convert sysctl permission checks to use kuid and kgids.
  userns: Convert proc to use kuid/kgid where appropriate
  userns: Convert ext4 to user kuid/kgid where appropriate
  userns: Convert ext3 to use kuid/kgid where appropriate
  userns: Convert ext2 to use kuid/kgid where appropriate.
  userns: Convert devpts to use kuid/kgid where appropriate
  userns: Convert binary formats to use kuid/kgid where appropriate
  userns: Add negative depends on entries to avoid building code that is userns unsafe
  userns: signal remove unnecessary map_cred_ns
  userns: Teach inode_capable to understand inodes whose uids map to other namespaces.
  userns: Fail exec for suid and sgid binaries with ids outside our user namespace.
  userns: Convert stat to return values mapped from kuids and kgids
  userns: Convert user specfied uids and gids in chown into kuids and kgid
  userns: Use uid_eq gid_eq helpers when comparing kuids and kgids in the vfs
  ...
2012-05-23 17:42:39 -07:00

477 lines
11 KiB
C

/*
* linux/fs/stat.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
void generic_fillattr(struct inode *inode, struct kstat *stat)
{
stat->dev = inode->i_sb->s_dev;
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
stat->uid = inode->i_uid;
stat->gid = inode->i_gid;
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
stat->blksize = (1 << inode->i_blkbits);
stat->blocks = inode->i_blocks;
}
EXPORT_SYMBOL(generic_fillattr);
int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
int retval;
retval = security_inode_getattr(mnt, dentry);
if (retval)
return retval;
if (inode->i_op->getattr)
return inode->i_op->getattr(mnt, dentry, stat);
generic_fillattr(inode, stat);
return 0;
}
EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
{
int fput_needed;
struct file *f = fget_light(fd, &fput_needed);
int error = -EBADF;
if (f) {
error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
fput_light(f, fput_needed);
}
return error;
}
EXPORT_SYMBOL(vfs_fstat);
int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int flag)
{
struct path path;
int error = -EINVAL;
int lookup_flags = 0;
if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
AT_EMPTY_PATH)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flag & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
error = vfs_getattr(path.mnt, path.dentry, stat);
path_put(&path);
out:
return error;
}
EXPORT_SYMBOL(vfs_fstatat);
int vfs_stat(const char __user *name, struct kstat *stat)
{
return vfs_fstatat(AT_FDCWD, name, stat, 0);
}
EXPORT_SYMBOL(vfs_stat);
int vfs_lstat(const char __user *name, struct kstat *stat)
{
return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
}
EXPORT_SYMBOL(vfs_lstat);
#ifdef __ARCH_WANT_OLD_STAT
/*
* For backward compatibility? Maybe this should be moved
* into arch/i386 instead?
*/
static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * statbuf)
{
static int warncount = 5;
struct __old_kernel_stat tmp;
if (warncount > 0) {
warncount--;
printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n",
current->comm);
} else if (warncount < 0) {
/* it's laughable, but... */
warncount = 0;
}
memset(&tmp, 0, sizeof(struct __old_kernel_stat));
tmp.st_dev = old_encode_dev(stat->dev);
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
tmp.st_mode = stat->mode;
tmp.st_nlink = stat->nlink;
if (tmp.st_nlink != stat->nlink)
return -EOVERFLOW;
SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
tmp.st_rdev = old_encode_dev(stat->rdev);
#if BITS_PER_LONG == 32
if (stat->size > MAX_NON_LFS)
return -EOVERFLOW;
#endif
tmp.st_size = stat->size;
tmp.st_atime = stat->atime.tv_sec;
tmp.st_mtime = stat->mtime.tv_sec;
tmp.st_ctime = stat->ctime.tv_sec;
return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
SYSCALL_DEFINE2(stat, const char __user *, filename,
struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
int error;
error = vfs_stat(filename, &stat);
if (error)
return error;
return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(lstat, const char __user *, filename,
struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
int error;
error = vfs_lstat(filename, &stat);
if (error)
return error;
return cp_old_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
{
struct kstat stat;
int error = vfs_fstat(fd, &stat);
if (!error)
error = cp_old_stat(&stat, statbuf);
return error;
}
#endif /* __ARCH_WANT_OLD_STAT */
#if BITS_PER_LONG == 32
# define choose_32_64(a,b) a
#else
# define choose_32_64(a,b) b
#endif
#define valid_dev(x) choose_32_64(old_valid_dev,new_valid_dev)(x)
#define encode_dev(x) choose_32_64(old_encode_dev,new_encode_dev)(x)
#ifndef INIT_STRUCT_STAT_PADDING
# define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st))
#endif
static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
{
struct stat tmp;
if (!valid_dev(stat->dev) || !valid_dev(stat->rdev))
return -EOVERFLOW;
#if BITS_PER_LONG == 32
if (stat->size > MAX_NON_LFS)
return -EOVERFLOW;
#endif
INIT_STRUCT_STAT_PADDING(tmp);
tmp.st_dev = encode_dev(stat->dev);
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
tmp.st_mode = stat->mode;
tmp.st_nlink = stat->nlink;
if (tmp.st_nlink != stat->nlink)
return -EOVERFLOW;
SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
tmp.st_rdev = encode_dev(stat->rdev);
tmp.st_size = stat->size;
tmp.st_atime = stat->atime.tv_sec;
tmp.st_mtime = stat->mtime.tv_sec;
tmp.st_ctime = stat->ctime.tv_sec;
#ifdef STAT_HAVE_NSEC
tmp.st_atime_nsec = stat->atime.tv_nsec;
tmp.st_mtime_nsec = stat->mtime.tv_nsec;
tmp.st_ctime_nsec = stat->ctime.tv_nsec;
#endif
tmp.st_blocks = stat->blocks;
tmp.st_blksize = stat->blksize;
return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
SYSCALL_DEFINE2(newstat, const char __user *, filename,
struct stat __user *, statbuf)
{
struct kstat stat;
int error = vfs_stat(filename, &stat);
if (error)
return error;
return cp_new_stat(&stat, statbuf);
}
SYSCALL_DEFINE2(newlstat, const char __user *, filename,
struct stat __user *, statbuf)
{
struct kstat stat;
int error;
error = vfs_lstat(filename, &stat);
if (error)
return error;
return cp_new_stat(&stat, statbuf);
}
#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename,
struct stat __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_new_stat(&stat, statbuf);
}
#endif
SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf)
{
struct kstat stat;
int error = vfs_fstat(fd, &stat);
if (!error)
error = cp_new_stat(&stat, statbuf);
return error;
}
SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
char __user *, buf, int, bufsiz)
{
struct path path;
int error;
int empty = 0;
if (bufsiz <= 0)
return -EINVAL;
error = user_path_at_empty(dfd, pathname, LOOKUP_EMPTY, &path, &empty);
if (!error) {
struct inode *inode = path.dentry->d_inode;
error = empty ? -ENOENT : -EINVAL;
if (inode->i_op->readlink) {
error = security_inode_readlink(path.dentry);
if (!error) {
touch_atime(&path);
error = inode->i_op->readlink(path.dentry,
buf, bufsiz);
}
}
path_put(&path);
}
return error;
}
SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf,
int, bufsiz)
{
return sys_readlinkat(AT_FDCWD, path, buf, bufsiz);
}
/* ---------- LFS-64 ----------- */
#ifdef __ARCH_WANT_STAT64
#ifndef INIT_STRUCT_STAT64_PADDING
# define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st))
#endif
static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
{
struct stat64 tmp;
INIT_STRUCT_STAT64_PADDING(tmp);
#ifdef CONFIG_MIPS
/* mips has weird padding, so we don't get 64 bits there */
if (!new_valid_dev(stat->dev) || !new_valid_dev(stat->rdev))
return -EOVERFLOW;
tmp.st_dev = new_encode_dev(stat->dev);
tmp.st_rdev = new_encode_dev(stat->rdev);
#else
tmp.st_dev = huge_encode_dev(stat->dev);
tmp.st_rdev = huge_encode_dev(stat->rdev);
#endif
tmp.st_ino = stat->ino;
if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
#ifdef STAT64_HAS_BROKEN_ST_INO
tmp.__st_ino = stat->ino;
#endif
tmp.st_mode = stat->mode;
tmp.st_nlink = stat->nlink;
tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
tmp.st_atime = stat->atime.tv_sec;
tmp.st_atime_nsec = stat->atime.tv_nsec;
tmp.st_mtime = stat->mtime.tv_sec;
tmp.st_mtime_nsec = stat->mtime.tv_nsec;
tmp.st_ctime = stat->ctime.tv_sec;
tmp.st_ctime_nsec = stat->ctime.tv_nsec;
tmp.st_size = stat->size;
tmp.st_blocks = stat->blocks;
tmp.st_blksize = stat->blksize;
return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
}
SYSCALL_DEFINE2(stat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_stat(filename, &stat);
if (!error)
error = cp_new_stat64(&stat, statbuf);
return error;
}
SYSCALL_DEFINE2(lstat64, const char __user *, filename,
struct stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_lstat(filename, &stat);
if (!error)
error = cp_new_stat64(&stat, statbuf);
return error;
}
SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf)
{
struct kstat stat;
int error = vfs_fstat(fd, &stat);
if (!error)
error = cp_new_stat64(&stat, statbuf);
return error;
}
SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_new_stat64(&stat, statbuf);
}
#endif /* __ARCH_WANT_STAT64 */
/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
void __inode_add_bytes(struct inode *inode, loff_t bytes)
{
inode->i_blocks += bytes >> 9;
bytes &= 511;
inode->i_bytes += bytes;
if (inode->i_bytes >= 512) {
inode->i_blocks++;
inode->i_bytes -= 512;
}
}
void inode_add_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
__inode_add_bytes(inode, bytes);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(inode_add_bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
inode->i_blocks -= bytes >> 9;
bytes &= 511;
if (inode->i_bytes < bytes) {
inode->i_blocks--;
inode->i_bytes += 512;
}
inode->i_bytes -= bytes;
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(inode_sub_bytes);
loff_t inode_get_bytes(struct inode *inode)
{
loff_t ret;
spin_lock(&inode->i_lock);
ret = (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
spin_unlock(&inode->i_lock);
return ret;
}
EXPORT_SYMBOL(inode_get_bytes);
void inode_set_bytes(struct inode *inode, loff_t bytes)
{
/* Caller is here responsible for sufficient locking
* (ie. inode->i_lock) */
inode->i_blocks = bytes >> 9;
inode->i_bytes = bytes & 511;
}
EXPORT_SYMBOL(inode_set_bytes);