Do not map small IOCTL buffers to KVA, but copy.

CAM IOCTL interfaces traditionally mapped user-space data buffers to KVA.
It was nice originally, but now it takes too much to handle respective
TLB shootdowns, while small kernel memory allocations up to 64KB backed
by UMA and accompanied by copyin()/copyout() can be much cheaper.

For large buffers mapping still may have sense, and unmapped I/O would
be even better, but the last unfortunately is more tricky, since unmapped
I/O API is too specific to struct bio now.

MFC after:	2 weeks
Sponsored by:	iXsystems, Inc.
This commit is contained in:
Alexander Motin 2019-03-28 20:41:02 +00:00
parent 2f59b04af1
commit b059686a71
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=345656
2 changed files with 108 additions and 48 deletions

View file

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/devicestat.h>
#include <sys/bus.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -104,6 +105,9 @@ TUNABLE_INT("kern.cam.periph_noresrc_delay", &periph_noresrc_delay);
static int periph_busy_delay = 500;
TUNABLE_INT("kern.cam.periph_busy_delay", &periph_busy_delay);
static u_int periph_mapmem_thresh = 65536;
SYSCTL_UINT(_kern_cam, OID_AUTO, mapmem_thresh, CTLFLAG_RWTUN,
&periph_mapmem_thresh, 0, "Threshold for user-space buffer mapping");
void
periphdriver_register(void *data)
@ -776,12 +780,12 @@ int
cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
u_int maxmap)
{
int numbufs, i, j;
int flags[CAM_PERIPH_MAXMAPS];
int numbufs, i;
u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
u_int32_t lengths[CAM_PERIPH_MAXMAPS];
u_int32_t dirs[CAM_PERIPH_MAXMAPS];
bzero(mapinfo, sizeof(*mapinfo));
if (maxmap == 0)
maxmap = DFLTPHYS; /* traditional default */
else if (maxmap > MAXPHYS)
@ -892,8 +896,6 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
*/
for (i = 0; i < numbufs; i++) {
flags[i] = 0;
/*
* The userland data pointer passed in may not be page
* aligned. vmapbuf() truncates the address to a page
@ -911,15 +913,6 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
(u_long)maxmap);
return(E2BIG);
}
if (dirs[i] & CAM_DIR_OUT) {
flags[i] = BIO_WRITE;
}
if (dirs[i] & CAM_DIR_IN) {
flags[i] = BIO_READ;
}
}
/*
@ -933,6 +926,32 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
PHOLD(curproc);
for (i = 0; i < numbufs; i++) {
/* Save the user's data address. */
mapinfo->orig[i] = *data_ptrs[i];
/*
* For small buffers use malloc+copyin/copyout instead of
* mapping to KVA to avoid expensive TLB shootdowns. For
* small allocations malloc is backed by UMA, and so much
* cheaper on SMP systems.
*/
if (lengths[i] <= periph_mapmem_thresh &&
ccb->ccb_h.func_code != XPT_MMC_IO) {
*data_ptrs[i] = malloc(lengths[i], M_CAMPERIPH,
M_WAITOK);
if (dirs[i] != CAM_DIR_IN) {
if (copyin(mapinfo->orig[i], *data_ptrs[i],
lengths[i]) != 0) {
free(*data_ptrs[i], M_CAMPERIPH);
*data_ptrs[i] = mapinfo->orig[i];
goto fail;
}
} else
bzero(*data_ptrs[i], lengths[i]);
continue;
}
/*
* Get the buffer.
*/
@ -941,14 +960,12 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
/* put our pointer in the data slot */
mapinfo->bp[i]->b_data = *data_ptrs[i];
/* save the user's data address */
mapinfo->bp[i]->b_caller1 = *data_ptrs[i];
/* set the transfer length, we know it's < MAXPHYS */
mapinfo->bp[i]->b_bufsize = lengths[i];
/* set the direction */
mapinfo->bp[i]->b_iocmd = flags[i];
mapinfo->bp[i]->b_iocmd = (dirs[i] == CAM_DIR_OUT) ?
BIO_WRITE : BIO_READ;
/*
* Map the buffer into kernel memory.
@ -959,20 +976,12 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
* vmapbuf() after the useracc() check.
*/
if (vmapbuf(mapinfo->bp[i], 1) < 0) {
for (j = 0; j < i; ++j) {
*data_ptrs[j] = mapinfo->bp[j]->b_caller1;
vunmapbuf(mapinfo->bp[j]);
uma_zfree(pbuf_zone, mapinfo->bp[j]);
}
uma_zfree(pbuf_zone, mapinfo->bp[i]);
PRELE(curproc);
return(EACCES);
goto fail;
}
/* set our pointer to the new mapped area */
*data_ptrs[i] = mapinfo->bp[i]->b_data;
mapinfo->num_bufs_used++;
}
/*
@ -981,11 +990,24 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo,
* space with locks (on the buffer) held.
*/
for (i = 0; i < numbufs; i++) {
BUF_KERNPROC(mapinfo->bp[i]);
if (mapinfo->bp[i])
BUF_KERNPROC(mapinfo->bp[i]);
}
mapinfo->num_bufs_used = numbufs;
return(0);
fail:
for (i--; i >= 0; i--) {
if (mapinfo->bp[i]) {
vunmapbuf(mapinfo->bp[i]);
uma_zfree(pbuf_zone, mapinfo->bp[i]);
} else
free(*data_ptrs[i], M_CAMPERIPH);
*data_ptrs[i] = mapinfo->orig[i];
}
PRELE(curproc);
return(EACCES);
}
/*
@ -997,6 +1019,8 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
{
int numbufs, i;
u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
u_int32_t lengths[CAM_PERIPH_MAXMAPS];
u_int32_t dirs[CAM_PERIPH_MAXMAPS];
if (mapinfo->num_bufs_used <= 0) {
/* nothing to free and the process wasn't held. */
@ -1005,37 +1029,64 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
switch (ccb->ccb_h.func_code) {
case XPT_DEV_MATCH:
numbufs = min(mapinfo->num_bufs_used, 2);
if (numbufs == 1) {
data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
} else {
if (ccb->cdm.pattern_buf_len > 0) {
data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
lengths[0] = ccb->cdm.pattern_buf_len;
dirs[0] = CAM_DIR_OUT;
data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
lengths[1] = ccb->cdm.match_buf_len;
dirs[1] = CAM_DIR_IN;
numbufs = 2;
} else {
data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
lengths[0] = ccb->cdm.match_buf_len;
dirs[0] = CAM_DIR_IN;
numbufs = 1;
}
break;
case XPT_SCSI_IO:
case XPT_CONT_TARGET_IO:
data_ptrs[0] = &ccb->csio.data_ptr;
numbufs = min(mapinfo->num_bufs_used, 1);
lengths[0] = ccb->csio.dxfer_len;
dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
numbufs = 1;
break;
case XPT_ATA_IO:
data_ptrs[0] = &ccb->ataio.data_ptr;
numbufs = min(mapinfo->num_bufs_used, 1);
lengths[0] = ccb->ataio.dxfer_len;
dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
numbufs = 1;
break;
case XPT_MMC_IO:
data_ptrs[0] = (u_int8_t **)&ccb->mmcio.cmd.data;
lengths[0] = sizeof(struct mmc_data *);
dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
data_ptrs[1] = (u_int8_t **)&ccb->mmcio.cmd.data->data;
lengths[1] = ccb->mmcio.cmd.data->len;
dirs[1] = ccb->ccb_h.flags & CAM_DIR_MASK;
numbufs = 2;
break;
case XPT_SMP_IO:
numbufs = min(mapinfo->num_bufs_used, 2);
data_ptrs[0] = &ccb->smpio.smp_request;
lengths[0] = ccb->smpio.smp_request_len;
dirs[0] = CAM_DIR_OUT;
data_ptrs[1] = &ccb->smpio.smp_response;
break;
case XPT_DEV_ADVINFO:
numbufs = min(mapinfo->num_bufs_used, 1);
data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
lengths[1] = ccb->smpio.smp_response_len;
dirs[1] = CAM_DIR_IN;
numbufs = 2;
break;
case XPT_NVME_IO:
case XPT_NVME_ADMIN:
data_ptrs[0] = &ccb->nvmeio.data_ptr;
numbufs = min(mapinfo->num_bufs_used, 1);
lengths[0] = ccb->nvmeio.dxfer_len;
dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
numbufs = 1;
break;
case XPT_DEV_ADVINFO:
data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
lengths[0] = ccb->cdai.bufsiz;
dirs[0] = CAM_DIR_IN;
numbufs = 1;
break;
default:
/* allow ourselves to be swapped once again */
@ -1045,14 +1096,22 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
}
for (i = 0; i < numbufs; i++) {
if (mapinfo->bp[i]) {
/* unmap the buffer */
vunmapbuf(mapinfo->bp[i]);
/* release the buffer */
uma_zfree(pbuf_zone, mapinfo->bp[i]);
} else {
if (dirs[i] != CAM_DIR_OUT) {
copyout(*data_ptrs[i], mapinfo->orig[i],
lengths[i]);
}
free(*data_ptrs[i], M_CAMPERIPH);
}
/* Set the user's pointer back to the original value */
*data_ptrs[i] = mapinfo->bp[i]->b_caller1;
/* unmap the buffer */
vunmapbuf(mapinfo->bp[i]);
/* release the buffer */
uma_zfree(pbuf_zone, mapinfo->bp[i]);
*data_ptrs[i] = mapinfo->orig[i];
}
/* allow ourselves to be swapped once again */

View file

@ -149,6 +149,7 @@ struct cam_periph {
struct cam_periph_map_info {
int num_bufs_used;
void *orig[CAM_PERIPH_MAXMAPS];
struct buf *bp[CAM_PERIPH_MAXMAPS];
};