mirror of
https://github.com/torvalds/linux
synced 2024-11-05 18:23:50 +00:00
xen/privcmd: improve performance of MMAPBATCH_V2
Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map multiple frames at a time rather than one at a time, despite the pages being non-consecutive GFNs. xen_remap_foreign_mfn_array() is added which maps an array of GFNs (instead of a consecutive range of GFNs). Since per-frame errors are returned in an array, privcmd must set the MMAPBATCH_V1 error bits as part of the "report errors" phase, after all the frames are mapped. Migrate times are significantly improved (when using a PV toolstack domain). For example, for an idle 12 GiB PV guest: Before After real 0m38.179s 0m26.868s user 0m15.096s 0m13.652s sys 0m28.988s 0m18.732s Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
This commit is contained in:
parent
628c28eefd
commit
4e8c0c8c4b
5 changed files with 249 additions and 80 deletions
|
@ -53,15 +53,27 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
|
|||
|
||||
static __read_mostly int xen_events_irq = -1;
|
||||
|
||||
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t mfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
xen_pfn_t *mfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid,
|
||||
struct page **pages)
|
||||
{
|
||||
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr,
|
||||
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
|
||||
prot, domid, pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
|
||||
|
||||
/* Not used by XENFEAT_auto_translated guests. */
|
||||
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t mfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
struct page **pages)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
|
||||
|
||||
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
|
|
|
@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void)
|
|||
#define REMAP_BATCH_SIZE 16
|
||||
|
||||
struct remap_data {
|
||||
unsigned long mfn;
|
||||
xen_pfn_t *mfn;
|
||||
bool contiguous;
|
||||
pgprot_t prot;
|
||||
struct mmu_update *mmu_update;
|
||||
};
|
||||
|
@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
|
|||
unsigned long addr, void *data)
|
||||
{
|
||||
struct remap_data *rmd = data;
|
||||
pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
|
||||
pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
|
||||
|
||||
/* If we have a contigious range, just update the mfn itself,
|
||||
else update pointer to be "next mfn". */
|
||||
if (rmd->contiguous)
|
||||
(*rmd->mfn)++;
|
||||
else
|
||||
rmd->mfn++;
|
||||
|
||||
rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
|
||||
rmd->mmu_update->val = pte_val_ma(pte);
|
||||
|
@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t mfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
struct page **pages)
|
||||
|
||||
static int do_remap_mfn(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t *mfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid,
|
||||
struct page **pages)
|
||||
{
|
||||
int err = 0;
|
||||
struct remap_data rmd;
|
||||
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
|
||||
int batch;
|
||||
unsigned long range;
|
||||
int err = 0;
|
||||
int mapped = 0;
|
||||
|
||||
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
|
||||
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap)) {
|
||||
#ifdef CONFIG_XEN_PVH
|
||||
/* We need to update the local page tables and the xen HAP */
|
||||
return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot,
|
||||
domid, pages);
|
||||
return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr,
|
||||
prot, domid, pages);
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
|||
|
||||
rmd.mfn = mfn;
|
||||
rmd.prot = prot;
|
||||
/* We use the err_ptr to indicate if there we are doing a contigious
|
||||
* mapping or a discontigious mapping. */
|
||||
rmd.contiguous = !err_ptr;
|
||||
|
||||
while (nr) {
|
||||
batch = min(REMAP_BATCH_SIZE, nr);
|
||||
int index = 0;
|
||||
int done = 0;
|
||||
int batch = min(REMAP_BATCH_SIZE, nr);
|
||||
int batch_left = batch;
|
||||
range = (unsigned long)batch << PAGE_SHIFT;
|
||||
|
||||
rmd.mmu_update = mmu_update;
|
||||
|
@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
|||
if (err)
|
||||
goto out;
|
||||
|
||||
err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
/* We record the error for each page that gives an error, but
|
||||
* continue mapping until the whole set is done */
|
||||
do {
|
||||
int i;
|
||||
|
||||
err = HYPERVISOR_mmu_update(&mmu_update[index],
|
||||
batch_left, &done, domid);
|
||||
|
||||
/*
|
||||
* @err_ptr may be the same buffer as @mfn, so
|
||||
* only clear it after each chunk of @mfn is
|
||||
* used.
|
||||
*/
|
||||
if (err_ptr) {
|
||||
for (i = index; i < index + done; i++)
|
||||
err_ptr[i] = 0;
|
||||
}
|
||||
if (err < 0) {
|
||||
if (!err_ptr)
|
||||
goto out;
|
||||
err_ptr[i] = err;
|
||||
done++; /* Skip failed frame. */
|
||||
} else
|
||||
mapped += done;
|
||||
batch_left -= done;
|
||||
index += done;
|
||||
} while (batch_left);
|
||||
|
||||
nr -= batch;
|
||||
addr += range;
|
||||
if (err_ptr)
|
||||
err_ptr += batch;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
out:
|
||||
|
||||
xen_flush_tlb_all();
|
||||
|
||||
return err;
|
||||
return err < 0 ? err : mapped;
|
||||
}
|
||||
|
||||
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t mfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
struct page **pages)
|
||||
{
|
||||
return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
|
||||
|
||||
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t *mfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid, struct page **pages)
|
||||
{
|
||||
/* We BUG_ON because it's a programmer error to pass a NULL err_ptr,
|
||||
* and the consequences later is quite hard to detect what the actual
|
||||
* cause of "wrong memory was mapped in".
|
||||
*/
|
||||
BUG_ON(err_ptr == NULL);
|
||||
return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array);
|
||||
|
||||
|
||||
/* Returns: 0 success */
|
||||
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
int numpgs, struct page **pages)
|
||||
|
|
|
@ -159,6 +159,40 @@ static int traverse_pages(unsigned nelem, size_t size,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to traverse_pages, but use each page as a "block" of
|
||||
* data to be processed as one unit.
|
||||
*/
|
||||
static int traverse_pages_block(unsigned nelem, size_t size,
|
||||
struct list_head *pos,
|
||||
int (*fn)(void *data, int nr, void *state),
|
||||
void *state)
|
||||
{
|
||||
void *pagedata;
|
||||
unsigned pageidx;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(size > PAGE_SIZE);
|
||||
|
||||
pageidx = PAGE_SIZE;
|
||||
|
||||
while (nelem) {
|
||||
int nr = (PAGE_SIZE/size);
|
||||
struct page *page;
|
||||
if (nr > nelem)
|
||||
nr = nelem;
|
||||
pos = pos->next;
|
||||
page = list_entry(pos, struct page, lru);
|
||||
pagedata = page_address(page);
|
||||
ret = (*fn)(pagedata, nr, state);
|
||||
if (ret)
|
||||
break;
|
||||
nelem -= nr;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct mmap_mfn_state {
|
||||
unsigned long va;
|
||||
struct vm_area_struct *vma;
|
||||
|
@ -274,39 +308,25 @@ struct mmap_batch_state {
|
|||
/* auto translated dom0 note: if domU being created is PV, then mfn is
|
||||
* mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
|
||||
*/
|
||||
static int mmap_batch_fn(void *data, void *state)
|
||||
static int mmap_batch_fn(void *data, int nr, void *state)
|
||||
{
|
||||
xen_pfn_t *mfnp = data;
|
||||
struct mmap_batch_state *st = state;
|
||||
struct vm_area_struct *vma = st->vma;
|
||||
struct page **pages = vma->vm_private_data;
|
||||
struct page *cur_page = NULL;
|
||||
struct page **cur_pages = NULL;
|
||||
int ret;
|
||||
|
||||
if (xen_feature(XENFEAT_auto_translated_physmap))
|
||||
cur_page = pages[st->index++];
|
||||
cur_pages = &pages[st->index];
|
||||
|
||||
ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
|
||||
st->vma->vm_page_prot, st->domain,
|
||||
&cur_page);
|
||||
BUG_ON(nr < 0);
|
||||
ret = xen_remap_domain_mfn_array(st->vma, st->va & PAGE_MASK, mfnp, nr,
|
||||
(int *)mfnp, st->vma->vm_page_prot,
|
||||
st->domain, cur_pages);
|
||||
|
||||
/* Store error code for second pass. */
|
||||
if (st->version == 1) {
|
||||
if (ret < 0) {
|
||||
/*
|
||||
* V1 encodes the error codes in the 32bit top nibble of the
|
||||
* mfn (with its known limitations vis-a-vis 64 bit callers).
|
||||
*/
|
||||
*mfnp |= (ret == -ENOENT) ?
|
||||
PRIVCMD_MMAPBATCH_PAGED_ERROR :
|
||||
PRIVCMD_MMAPBATCH_MFN_ERROR;
|
||||
}
|
||||
} else { /* st->version == 2 */
|
||||
*((int *) mfnp) = ret;
|
||||
}
|
||||
|
||||
/* And see if it affects the global_error. */
|
||||
if (ret < 0) {
|
||||
/* Adjust the global_error? */
|
||||
if (ret != nr) {
|
||||
if (ret == -ENOENT)
|
||||
st->global_error = -ENOENT;
|
||||
else {
|
||||
|
@ -315,23 +335,35 @@ static int mmap_batch_fn(void *data, void *state)
|
|||
st->global_error = 1;
|
||||
}
|
||||
}
|
||||
st->va += PAGE_SIZE;
|
||||
st->va += PAGE_SIZE * nr;
|
||||
st->index += nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mmap_return_errors(void *data, void *state)
|
||||
static int mmap_return_error(int err, struct mmap_batch_state *st)
|
||||
{
|
||||
struct mmap_batch_state *st = state;
|
||||
int ret;
|
||||
|
||||
if (st->version == 1) {
|
||||
xen_pfn_t mfnp = *((xen_pfn_t *) data);
|
||||
if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
|
||||
return __put_user(mfnp, st->user_mfn++);
|
||||
else
|
||||
if (err) {
|
||||
xen_pfn_t mfn;
|
||||
|
||||
ret = get_user(mfn, st->user_mfn);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/*
|
||||
* V1 encodes the error codes in the 32bit top
|
||||
* nibble of the mfn (with its known
|
||||
* limitations vis-a-vis 64 bit callers).
|
||||
*/
|
||||
mfn |= (err == -ENOENT) ?
|
||||
PRIVCMD_MMAPBATCH_PAGED_ERROR :
|
||||
PRIVCMD_MMAPBATCH_MFN_ERROR;
|
||||
return __put_user(mfn, st->user_mfn++);
|
||||
} else
|
||||
st->user_mfn++;
|
||||
} else { /* st->version == 2 */
|
||||
int err = *((int *) data);
|
||||
if (err)
|
||||
return __put_user(err, st->user_err++);
|
||||
else
|
||||
|
@ -341,6 +373,21 @@ static int mmap_return_errors(void *data, void *state)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int mmap_return_errors(void *data, int nr, void *state)
|
||||
{
|
||||
struct mmap_batch_state *st = state;
|
||||
int *errs = data;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
ret = mmap_return_error(errs[i], st);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
|
||||
* the vma with the page info to use later.
|
||||
* Returns: 0 if success, otherwise -errno
|
||||
|
@ -472,8 +519,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
|
|||
state.version = version;
|
||||
|
||||
/* mmap_batch_fn guarantees ret == 0 */
|
||||
BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
|
||||
&pagelist, mmap_batch_fn, &state));
|
||||
BUG_ON(traverse_pages_block(m.num, sizeof(xen_pfn_t),
|
||||
&pagelist, mmap_batch_fn, &state));
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
|
@ -481,8 +528,8 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
|
|||
/* Write back errors in second pass. */
|
||||
state.user_mfn = (xen_pfn_t *)m.arr;
|
||||
state.user_err = m.err;
|
||||
ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
||||
&pagelist, mmap_return_errors, &state);
|
||||
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
|
||||
&pagelist, mmap_return_errors, &state);
|
||||
} else
|
||||
ret = 0;
|
||||
|
||||
|
|
|
@ -62,13 +62,15 @@ static int map_foreign_page(unsigned long lpfn, unsigned long fgmfn,
|
|||
}
|
||||
|
||||
struct remap_data {
|
||||
xen_pfn_t fgmfn; /* foreign domain's gmfn */
|
||||
xen_pfn_t *fgmfn; /* foreign domain's gmfn */
|
||||
pgprot_t prot;
|
||||
domid_t domid;
|
||||
struct vm_area_struct *vma;
|
||||
int index;
|
||||
struct page **pages;
|
||||
struct xen_remap_mfn_info *info;
|
||||
int *err_ptr;
|
||||
int mapped;
|
||||
};
|
||||
|
||||
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
|
||||
|
@ -80,38 +82,46 @@ static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
|
|||
pte_t pte = pte_mkspecial(pfn_pte(pfn, info->prot));
|
||||
int rc;
|
||||
|
||||
rc = map_foreign_page(pfn, info->fgmfn, info->domid);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
set_pte_at(info->vma->vm_mm, addr, ptep, pte);
|
||||
rc = map_foreign_page(pfn, *info->fgmfn, info->domid);
|
||||
*info->err_ptr++ = rc;
|
||||
if (!rc) {
|
||||
set_pte_at(info->vma->vm_mm, addr, ptep, pte);
|
||||
info->mapped++;
|
||||
}
|
||||
info->fgmfn++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
|
||||
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t gfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
xen_pfn_t *mfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid,
|
||||
struct page **pages)
|
||||
{
|
||||
int err;
|
||||
struct remap_data data;
|
||||
unsigned long range = nr << PAGE_SHIFT;
|
||||
|
||||
/* TBD: Batching, current sole caller only does page at a time */
|
||||
if (nr > 1)
|
||||
return -EINVAL;
|
||||
/* Kept here for the purpose of making sure code doesn't break
|
||||
x86 PVOPS */
|
||||
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
|
||||
|
||||
data.fgmfn = gfn;
|
||||
data.prot = prot;
|
||||
data.fgmfn = mfn;
|
||||
data.prot = prot;
|
||||
data.domid = domid;
|
||||
data.vma = vma;
|
||||
data.index = 0;
|
||||
data.vma = vma;
|
||||
data.pages = pages;
|
||||
err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
|
||||
data.index = 0;
|
||||
data.err_ptr = err_ptr;
|
||||
data.mapped = 0;
|
||||
|
||||
err = apply_to_page_range(vma->vm_mm, addr, range,
|
||||
remap_pte_fn, &data);
|
||||
return err;
|
||||
return err < 0 ? err : data.mapped;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_range);
|
||||
EXPORT_SYMBOL_GPL(xen_xlate_remap_gfn_array);
|
||||
|
||||
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
|
||||
int nr, struct page **pages)
|
||||
|
|
|
@ -27,17 +27,54 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
|
|||
void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
|
||||
|
||||
struct vm_area_struct;
|
||||
|
||||
/*
|
||||
* xen_remap_domain_mfn_array() - map an array of foreign frames
|
||||
* @vma: VMA to map the pages into
|
||||
* @addr: Address at which to map the pages
|
||||
* @gfn: Array of GFNs to map
|
||||
* @nr: Number entries in the GFN array
|
||||
* @err_ptr: Returns per-GFN error status.
|
||||
* @prot: page protection mask
|
||||
* @domid: Domain owning the pages
|
||||
* @pages: Array of pages if this domain has an auto-translated physmap
|
||||
*
|
||||
* @gfn and @err_ptr may point to the same buffer, the GFNs will be
|
||||
* overwritten by the error codes after they are mapped.
|
||||
*
|
||||
* Returns the number of successfully mapped frames, or a -ve error
|
||||
* code.
|
||||
*/
|
||||
int xen_remap_domain_mfn_array(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t *gfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid,
|
||||
struct page **pages);
|
||||
|
||||
/* xen_remap_domain_mfn_range() - map a range of foreign frames
|
||||
* @vma: VMA to map the pages into
|
||||
* @addr: Address at which to map the pages
|
||||
* @gfn: First GFN to map.
|
||||
* @nr: Number frames to map
|
||||
* @prot: page protection mask
|
||||
* @domid: Domain owning the pages
|
||||
* @pages: Array of pages if this domain has an auto-translated physmap
|
||||
*
|
||||
* Returns the number of successfully mapped frames, or a -ve error
|
||||
* code.
|
||||
*/
|
||||
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t mfn, int nr,
|
||||
xen_pfn_t gfn, int nr,
|
||||
pgprot_t prot, unsigned domid,
|
||||
struct page **pages);
|
||||
int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
|
||||
int numpgs, struct page **pages);
|
||||
int xen_xlate_remap_gfn_range(struct vm_area_struct *vma,
|
||||
int xen_xlate_remap_gfn_array(struct vm_area_struct *vma,
|
||||
unsigned long addr,
|
||||
xen_pfn_t gfn, int nr,
|
||||
pgprot_t prot,
|
||||
xen_pfn_t *gfn, int nr,
|
||||
int *err_ptr, pgprot_t prot,
|
||||
unsigned domid,
|
||||
struct page **pages);
|
||||
int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
|
||||
|
|
Loading…
Reference in a new issue