userfaultfd: fix regression in userfaultfd_unmap_prep()

Android reported a performance regression in the userfaultfd unmap path. 
A closer inspection on the userfaultfd_unmap_prep() change showed that a
second tree walk would be necessary in the reworked code.

Fix the regression by passing each VMA that will be unmapped through to
the userfaultfd_unmap_prep() function as they are added to the unmap list,
instead of re-walking the tree for the VMA.

Link: https://lkml.kernel.org/r/20230601015402.2819343-1-Liam.Howlett@oracle.com
Fixes: 69dbe6daf1 ("userfaultfd: use maple tree iterator to iterate VMAs")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Liam R. Howlett 2023-05-31 21:54:02 -04:00 committed by Andrew Morton
parent 1e3be4856f
commit 65ac132027
3 changed files with 33 additions and 39 deletions

View file

@ -852,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false;
}
int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct list_head *unmaps)
{
VMA_ITERATOR(vmi, mm, start);
struct vm_area_struct *vma;
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
for_each_vma_range(vmi, vma, end) {
struct userfaultfd_unmap_ctx *unmap_ctx;
struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end))
return 0;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
has_unmap_ctx(ctx, unmaps, start, end))
continue;
unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
if (!unmap_ctx)
return -ENOMEM;
unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
if (!unmap_ctx)
return -ENOMEM;
userfaultfd_ctx_get(ctx);
atomic_inc(&ctx->mmap_changing);
unmap_ctx->ctx = ctx;
unmap_ctx->start = start;
unmap_ctx->end = end;
list_add_tail(&unmap_ctx->list, unmaps);
}
userfaultfd_ctx_get(ctx);
atomic_inc(&ctx->mmap_changing);
unmap_ctx->ctx = ctx;
unmap_ctx->start = start;
unmap_ctx->end = end;
list_add_tail(&unmap_ctx->list, unmaps);
return 0;
}

View file

@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);
extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
unsigned long end, struct list_head *uf);
extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end, struct list_head *uf);
extern void userfaultfd_unmap_complete(struct mm_struct *mm,
struct list_head *uf);
extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
return true;
}
static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct list_head *uf)
{

View file

@ -2417,6 +2417,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto munmap_sidetree_failed;
count++;
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a
* highly unexpected error anyway. This is no
* different than the case where the first of the two
* __split_vma fails, but we don't undo the first
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
error = userfaultfd_unmap_prep(next, start, end, uf);
if (error)
goto userfaultfd_error;
}
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < start);
BUG_ON(next->vm_start > end);
@ -2429,22 +2444,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (!next)
next = vma_next(vmi);
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
* will remain split, but userland will get a
* highly unexpected error anyway. This is no
* different than the case where the first of the two
* __split_vma fails, but we don't undo the first
* split, despite we could. This is unlikely enough
* failure that it's not worth optimizing it for.
*/
error = userfaultfd_unmap_prep(mm, start, end, uf);
if (error)
goto userfaultfd_error;
}
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */
{