diff --git a/fs/namespace.c b/fs/namespace.c index df137ba19d37..e276c0e7ecea 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -75,6 +75,22 @@ static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ +struct mnt_idmap { + struct user_namespace *owner; + refcount_t count; +}; + +/* + * Carries the initial idmapping of 0:0:4294967295 which is an identity + * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is + * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. + */ +struct mnt_idmap nop_mnt_idmap = { + .owner = &init_user_ns, + .count = REFCOUNT_INIT(1), +}; +EXPORT_SYMBOL_GPL(nop_mnt_idmap); + struct mount_kattr { unsigned int attr_set; unsigned int attr_clr; @@ -82,6 +98,7 @@ struct mount_kattr { unsigned int lookup_flags; bool recurse; struct user_namespace *mnt_userns; + struct mnt_idmap *mnt_idmap; }; /* /sys/fs */ @@ -193,6 +210,104 @@ int mnt_get_count(struct mount *mnt) #endif } +/** + * mnt_idmap_owner - retrieve owner of the mount's idmapping + * @idmap: mount idmapping + * + * This helper will go away once the conversion to use struct mnt_idmap + * everywhere has finished at which point the helper will be unexported. + * + * Only code that needs to perform permission checks based on the owner of the + * idmapping will get access to it. All other code will solely rely on + * idmappings. This will get us type safety so it's impossible to conflate + * filesystems idmappings with mount idmappings. + * + * Return: The owner of the idmapping. + */ +struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap) +{ + return idmap->owner; +} +EXPORT_SYMBOL_GPL(mnt_idmap_owner); + +/** + * mnt_user_ns - retrieve owner of an idmapped mount + * @mnt: the relevant vfsmount + * + * This helper will go away once the conversion to use struct mnt_idmap + * everywhere has finished at which point the helper will be unexported. + * + * Only code that needs to perform permission checks based on the owner of the + * idmapping will get access to it. All other code will solely rely on + * idmappings. This will get us type safety so it's impossible to conflate + * filesystems idmappings with mount idmappings. + * + * Return: The owner of the idmapped. + */ +struct user_namespace *mnt_user_ns(const struct vfsmount *mnt) +{ + struct mnt_idmap *idmap = mnt_idmap(mnt); + + /* Return the actual owner of the filesystem instead of the nop. */ + if (idmap == &nop_mnt_idmap && + !initial_idmapping(mnt->mnt_sb->s_user_ns)) + return mnt->mnt_sb->s_user_ns; + return mnt_idmap_owner(idmap); +} +EXPORT_SYMBOL_GPL(mnt_user_ns); + +/** + * alloc_mnt_idmap - allocate a new idmapping for the mount + * @mnt_userns: owning userns of the idmapping + * + * Allocate a new struct mnt_idmap which carries the idmapping of the mount. + * + * Return: On success a new idmap, on error an error pointer is returned. + */ +static struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns) +{ + struct mnt_idmap *idmap; + + idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT); + if (!idmap) + return ERR_PTR(-ENOMEM); + + idmap->owner = get_user_ns(mnt_userns); + refcount_set(&idmap->count, 1); + return idmap; +} + +/** + * mnt_idmap_get - get a reference to an idmapping + * @idmap: the idmap to bump the reference on + * + * If @idmap is not the @nop_mnt_idmap bump the reference count. + * + * Return: @idmap with reference count bumped if @not_mnt_idmap isn't passed. + */ +static inline struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap) +{ + if (idmap != &nop_mnt_idmap) + refcount_inc(&idmap->count); + + return idmap; +} + +/** + * mnt_idmap_put - put a reference to an idmapping + * @idmap: the idmap to put the reference on + * + * If this is a non-initial idmapping, put the reference count when a mount is + * released and free it if we're the last user. + */ +static inline void mnt_idmap_put(struct mnt_idmap *idmap) +{ + if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) { + put_user_ns(idmap->owner); + kfree(idmap); + } +} + static struct mount *alloc_vfsmnt(const char *name) { struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); @@ -232,7 +347,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_HLIST_NODE(&mnt->mnt_mp_list); INIT_LIST_HEAD(&mnt->mnt_umounting); INIT_HLIST_HEAD(&mnt->mnt_stuck_children); - mnt->mnt.mnt_userns = &init_user_ns; + mnt->mnt.mnt_idmap = &nop_mnt_idmap; } return mnt; @@ -602,11 +717,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { - struct user_namespace *mnt_userns; - - mnt_userns = mnt_user_ns(&mnt->mnt); - if (!initial_idmapping(mnt_userns)) - put_user_ns(mnt_userns); + mnt_idmap_put(mnt_idmap(&mnt->mnt)); kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); @@ -1009,7 +1120,6 @@ static struct mount *skip_mnt_tree(struct mount *p) struct vfsmount *vfs_create_mount(struct fs_context *fc) { struct mount *mnt; - struct user_namespace *fs_userns; if (!fc->root) return ERR_PTR(-EINVAL); @@ -1027,10 +1137,6 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - fs_userns = mnt->mnt.mnt_sb->s_user_ns; - if (!initial_idmapping(fs_userns)) - mnt->mnt.mnt_userns = get_user_ns(fs_userns); - lock_mount_hash(); list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); unlock_mount_hash(); @@ -1120,9 +1226,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); atomic_inc(&sb->s_active); - mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt); - if (!initial_idmapping(mnt->mnt.mnt_userns)) - mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns); + mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); + mnt->mnt.mnt_sb = sb; mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; @@ -3981,14 +4086,14 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) struct vfsmount *m = &mnt->mnt; struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; - if (!kattr->mnt_userns) + if (!kattr->mnt_idmap) return 0; /* * Creating an idmapped mount with the filesystem wide idmapping * doesn't make sense so block that. We don't allow mushy semantics. */ - if (kattr->mnt_userns == fs_userns) + if (mnt_idmap_owner(kattr->mnt_idmap) == fs_userns) return -EINVAL; /* @@ -4028,7 +4133,7 @@ static inline bool mnt_allow_writers(const struct mount_kattr *kattr, { return (!(kattr->attr_set & MNT_READONLY) || (mnt->mnt.mnt_flags & MNT_READONLY)) && - !kattr->mnt_userns; + !kattr->mnt_idmap; } static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) @@ -4082,27 +4187,18 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { - struct user_namespace *mnt_userns, *old_mnt_userns; - - if (!kattr->mnt_userns) + if (!kattr->mnt_idmap) return; /* - * We're the only ones able to change the mount's idmapping. So - * mnt->mnt.mnt_userns is stable and we can retrieve it directly. + * Pairs with smp_load_acquire() in mnt_idmap(). + * + * Since we only allow a mount to change the idmapping once and + * verified this in can_idmap_mount() we know that the mount has + * @nop_mnt_idmap attached to it. So there's no need to drop any + * references. */ - old_mnt_userns = mnt->mnt.mnt_userns; - - mnt_userns = get_user_ns(kattr->mnt_userns); - /* Pairs with smp_load_acquire() in mnt_user_ns(). */ - smp_store_release(&mnt->mnt.mnt_userns, mnt_userns); - - /* - * If this is an idmapped filesystem drop the reference we've taken - * in vfs_create_mount() before. - */ - if (!initial_idmapping(old_mnt_userns)) - put_user_ns(old_mnt_userns); + smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); } static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) @@ -4136,6 +4232,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) if (path->dentry != mnt->mnt.mnt_root) return -EINVAL; + if (kattr->mnt_userns) { + struct mnt_idmap *mnt_idmap; + + mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns); + if (IS_ERR(mnt_idmap)) + return PTR_ERR(mnt_idmap); + kattr->mnt_idmap = mnt_idmap; + } + if (kattr->propagation) { /* * Only take namespace_lock() if we're actually changing @@ -4323,6 +4428,9 @@ static void finish_mount_kattr(struct mount_kattr *kattr) { put_user_ns(kattr->mnt_userns); kattr->mnt_userns = NULL; + + if (kattr->mnt_idmap) + mnt_idmap_put(kattr->mnt_idmap); } SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, diff --git a/include/linux/fs.h b/include/linux/fs.h index cffc402c2451..be56c2192926 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2700,18 +2700,22 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) return mnt_user_ns(file->f_path.mnt); } +static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +{ + return mnt_idmap(file->f_path.mnt); +} + /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * - * If @mnt has an idmapping attached different from the - * filesystem's idmapping then @mnt is mapped. + * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { - return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; + return mnt_idmap(mnt) != &nop_mnt_idmap; } extern long vfs_truncate(const struct path *, loff_t); diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index c8002294a72d..092c52aa6c2c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -5,12 +5,10 @@ #include #include +struct mnt_idmap; struct user_namespace; -/* - * Carries the initial idmapping of 0:0:4294967295 which is an identity - * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is - * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. - */ + +extern struct mnt_idmap nop_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { diff --git a/include/linux/mount.h b/include/linux/mount.h index 55a4abaf6715..62475996fac6 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -16,6 +16,7 @@ struct super_block; struct dentry; struct user_namespace; +struct mnt_idmap; struct file_system_type; struct fs_context; struct file; @@ -70,13 +71,15 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; - struct user_namespace *mnt_userns; + struct mnt_idmap *mnt_idmap; } __randomize_layout; -static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt) +struct user_namespace *mnt_user_ns(const struct vfsmount *mnt); +struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap); +static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_userns); + return smp_load_acquire(&mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt);