Merge GEOM direct dispatch changes from the projects/camlock branch.

When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.

The defined now safety requirements are:
 - caller should not hold any locks and should be reenterable;
 - callee should not depend on GEOM dual-threaded concurency semantics;
 - on the way down, if request is unmapped while callee doesn't support it,
   the context should be sleepable;
 - kernel thread stack usage should be below 50%.

To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
 - G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
 - G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
 - G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
 - G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe.  If any of requirements are not met, request is queued to
g_up or g_down thread same as before.

Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).

To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION.  da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.

This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).

Sponsored by:	iXsystems, Inc.
MFC after:	2 months
This commit is contained in:
Alexander Motin 2013-10-22 08:22:19 +00:00
parent c614125e67
commit 40ea77a036
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=256880
35 changed files with 465 additions and 214 deletions

View file

@ -1253,7 +1253,7 @@ adaregister(struct cam_periph *periph, void *arg)
maxio = min(maxio, 256 * softc->params.secsize);
softc->disk->d_maxsize = maxio;
softc->disk->d_unit = periph->unit_number;
softc->disk->d_flags = 0;
softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE)
softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
if (softc->flags & ADA_FLAG_CAN_TRIM) {

View file

@ -2125,7 +2125,7 @@ daregister(struct cam_periph *periph, void *arg)
else
softc->disk->d_maxsize = cpi.maxio;
softc->disk->d_unit = periph->unit_number;
softc->disk->d_flags = 0;
softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0)
softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
if ((cpi.hba_misc & PIM_UNMAPPED) != 0)

View file

@ -147,6 +147,7 @@ vdev_geom_attach(struct g_provider *pp)
ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
}
}
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
return (cp);
}

View file

@ -2153,6 +2153,7 @@ zvol_geom_create(const char *name)
gp->start = zvol_geom_start;
gp->access = zvol_geom_access;
pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name);
pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
pp->sectorsize = DEV_BSIZE;
zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
@ -2256,18 +2257,20 @@ zvol_geom_start(struct bio *bp)
zvol_state_t *zv;
boolean_t first;
zv = bp->bio_to->private;
ASSERT(zv != NULL);
switch (bp->bio_cmd) {
case BIO_FLUSH:
if (!THREAD_CAN_SLEEP())
goto enqueue;
zil_commit(zv->zv_zilog, ZVOL_OBJ);
g_io_deliver(bp, 0);
break;
case BIO_READ:
case BIO_WRITE:
case BIO_FLUSH:
zv = bp->bio_to->private;
ASSERT(zv != NULL);
mtx_lock(&zv->zv_queue_mtx);
first = (bioq_first(&zv->zv_queue) == NULL);
bioq_insert_tail(&zv->zv_queue, bp);
mtx_unlock(&zv->zv_queue_mtx);
if (first)
wakeup_one(&zv->zv_queue);
if (!THREAD_CAN_SLEEP())
goto enqueue;
zvol_strategy(bp);
break;
case BIO_GETATTR:
case BIO_DELETE:
@ -2275,6 +2278,15 @@ zvol_geom_start(struct bio *bp)
g_io_deliver(bp, EOPNOTSUPP);
break;
}
return;
enqueue:
mtx_lock(&zv->zv_queue_mtx);
first = (bioq_first(&zv->zv_queue) == NULL);
bioq_insert_tail(&zv->zv_queue, bp);
mtx_unlock(&zv->zv_queue_mtx);
if (first)
wakeup_one(&zv->zv_queue);
}
static void
@ -2449,6 +2461,7 @@ zvol_rename_minor(struct g_geom *gp, const char *newname)
g_wither_provider(pp, ENXIO);
pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname);
pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND;
pp->sectorsize = DEV_BSIZE;
pp->mediasize = zv->zv_volsize;
pp->private = zv;

View file

@ -189,6 +189,7 @@ struct md_s {
LIST_ENTRY(md_s) list;
struct bio_queue_head bio_queue;
struct mtx queue_mtx;
struct mtx stat_mtx;
struct cdev *dev;
enum md_types type;
off_t mediasize;
@ -415,8 +416,11 @@ g_md_start(struct bio *bp)
struct md_s *sc;
sc = bp->bio_to->geom->softc;
if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) {
mtx_lock(&sc->stat_mtx);
devstat_start_transaction_bio(sc->devstat, bp);
mtx_unlock(&sc->stat_mtx);
}
mtx_lock(&sc->queue_mtx);
bioq_disksort(&sc->bio_queue, bp);
mtx_unlock(&sc->queue_mtx);
@ -987,6 +991,7 @@ mdnew(int unit, int *errp, enum md_types type)
sc->type = type;
bioq_init(&sc->bio_queue);
mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF);
sc->unit = unit;
sprintf(sc->name, "md%d", unit);
LIST_INSERT_HEAD(&md_softc_list, sc, list);
@ -994,6 +999,7 @@ mdnew(int unit, int *errp, enum md_types type)
if (error == 0)
return (sc);
LIST_REMOVE(sc, list);
mtx_destroy(&sc->stat_mtx);
mtx_destroy(&sc->queue_mtx);
free_unr(md_uh, sc->unit);
free(sc, M_MD);
@ -1011,6 +1017,7 @@ mdinit(struct md_s *sc)
gp = g_new_geomf(&g_md_class, "md%d", sc->unit);
gp->softc = sc;
pp = g_new_providerf(gp, "md%d", sc->unit);
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
pp->mediasize = sc->mediasize;
pp->sectorsize = sc->sectorsize;
switch (sc->type) {
@ -1206,6 +1213,7 @@ mddestroy(struct md_s *sc, struct thread *td)
while (!(sc->flags & MD_EXITING))
msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10);
mtx_unlock(&sc->queue_mtx);
mtx_destroy(&sc->stat_mtx);
mtx_destroy(&sc->queue_mtx);
if (sc->vnode != NULL) {
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);

View file

@ -238,6 +238,27 @@ g_concat_kernel_dump(struct bio *bp)
disk->d_consumer->provider->name);
}
static void
g_concat_done(struct bio *bp)
{
struct g_concat_softc *sc;
struct bio *pbp;
pbp = bp->bio_parent;
sc = pbp->bio_to->geom->softc;
mtx_lock(&sc->sc_lock);
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
pbp->bio_completed += bp->bio_completed;
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
mtx_unlock(&sc->sc_lock);
g_io_deliver(pbp, pbp->bio_error);
} else
mtx_unlock(&sc->sc_lock);
g_destroy_bio(bp);
}
static void
g_concat_flush(struct g_concat_softc *sc, struct bio *bp)
{
@ -250,23 +271,19 @@ g_concat_flush(struct g_concat_softc *sc, struct bio *bp)
for (no = 0; no < sc->sc_ndisks; no++) {
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL)
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
return;
}
bioq_insert_tail(&queue, cbp);
cbp->bio_done = g_std_done;
cbp->bio_done = g_concat_done;
cbp->bio_caller1 = sc->sc_disks[no].d_consumer;
cbp->bio_to = sc->sc_disks[no].d_consumer->provider;
}
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL) {
G_CONCAT_LOGREQ(cbp, "Sending request.");
cp = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
@ -320,7 +337,10 @@ g_concat_start(struct bio *bp)
offset = bp->bio_offset;
length = bp->bio_length;
addr = bp->bio_data;
if ((bp->bio_flags & BIO_UNMAPPED) != 0)
addr = NULL;
else
addr = bp->bio_data;
end = offset + length;
bioq_init(&queue);
@ -338,11 +358,8 @@ g_concat_start(struct bio *bp)
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL)
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
@ -352,11 +369,21 @@ g_concat_start(struct bio *bp)
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_std_done;
if (len == bp->bio_length)
cbp->bio_done = g_std_done;
else
cbp->bio_done = g_concat_done;
cbp->bio_offset = off;
cbp->bio_data = addr;
addr += len;
cbp->bio_length = len;
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
cbp->bio_ma_offset += (uintptr_t)addr;
cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
cbp->bio_ma_offset %= PAGE_SIZE;
cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
cbp->bio_length) / PAGE_SIZE;
} else
cbp->bio_data = addr;
addr += len;
cbp->bio_to = disk->d_consumer->provider;
cbp->bio_caller1 = disk;
@ -366,8 +393,7 @@ g_concat_start(struct bio *bp)
KASSERT(length == 0,
("Length is still greater than 0 (class=%s, name=%s).",
bp->bio_to->geom->class->name, bp->bio_to->geom->name));
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL) {
G_CONCAT_LOGREQ(cbp, "Sending request.");
disk = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
@ -379,7 +405,7 @@ static void
g_concat_check_and_run(struct g_concat_softc *sc)
{
struct g_concat_disk *disk;
struct g_provider *pp;
struct g_provider *dp, *pp;
u_int no, sectorsize = 0;
off_t start;
@ -388,20 +414,27 @@ g_concat_check_and_run(struct g_concat_softc *sc)
return;
pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name);
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE |
G_PF_ACCEPT_UNMAPPED;
start = 0;
for (no = 0; no < sc->sc_ndisks; no++) {
disk = &sc->sc_disks[no];
dp = disk->d_consumer->provider;
disk->d_start = start;
disk->d_end = disk->d_start +
disk->d_consumer->provider->mediasize;
disk->d_end = disk->d_start + dp->mediasize;
if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC)
disk->d_end -= disk->d_consumer->provider->sectorsize;
disk->d_end -= dp->sectorsize;
start = disk->d_end;
if (no == 0)
sectorsize = disk->d_consumer->provider->sectorsize;
else {
sectorsize = lcm(sectorsize,
disk->d_consumer->provider->sectorsize);
sectorsize = dp->sectorsize;
else
sectorsize = lcm(sectorsize, dp->sectorsize);
/* A provider underneath us doesn't support unmapped */
if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
G_CONCAT_DEBUG(1, "Cancelling unmapped "
"because of %s.", dp->name);
pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
}
}
pp->sectorsize = sectorsize;
@ -468,6 +501,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no)
fcp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error != 0) {
g_destroy_consumer(cp);
@ -557,6 +591,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md,
for (no = 0; no < sc->sc_ndisks; no++)
sc->sc_disks[no].d_consumer = NULL;
sc->sc_type = type;
mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF);
gp->softc = sc;
sc->sc_geom = gp;
@ -605,6 +640,7 @@ g_concat_destroy(struct g_concat_softc *sc, boolean_t force)
KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
gp->name));
free(sc->sc_disks, M_CONCAT);
mtx_destroy(&sc->sc_lock);
free(sc, M_CONCAT);
G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name);

View file

@ -83,6 +83,7 @@ struct g_concat_softc {
struct g_concat_disk *sc_disks;
uint16_t sc_ndisks;
struct mtx sc_lock;
};
#define sc_name sc_geom->name
#endif /* _KERNEL */

View file

@ -91,6 +91,7 @@ static struct mtx g_gate_units_lock;
static int
g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
{
struct bio_queue_head queue;
struct g_provider *pp;
struct g_consumer *cp;
struct g_geom *gp;
@ -113,21 +114,22 @@ g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
pp->flags |= G_PF_WITHER;
g_orphan_provider(pp, ENXIO);
callout_drain(&sc->sc_callout);
bioq_init(&queue);
mtx_lock(&sc->sc_queue_mtx);
while ((bp = bioq_first(&sc->sc_inqueue)) != NULL) {
bioq_remove(&sc->sc_inqueue, bp);
while ((bp = bioq_takefirst(&sc->sc_inqueue)) != NULL) {
sc->sc_queue_count--;
G_GATE_LOGREQ(1, bp, "Request canceled.");
g_io_deliver(bp, ENXIO);
bioq_insert_tail(&queue, bp);
}
while ((bp = bioq_first(&sc->sc_outqueue)) != NULL) {
bioq_remove(&sc->sc_outqueue, bp);
while ((bp = bioq_takefirst(&sc->sc_outqueue)) != NULL) {
sc->sc_queue_count--;
G_GATE_LOGREQ(1, bp, "Request canceled.");
g_io_deliver(bp, ENXIO);
bioq_insert_tail(&queue, bp);
}
mtx_unlock(&sc->sc_queue_mtx);
g_topology_unlock();
while ((bp = bioq_takefirst(&queue)) != NULL) {
G_GATE_LOGREQ(1, bp, "Request canceled.");
g_io_deliver(bp, ENXIO);
}
mtx_lock(&g_gate_units_lock);
/* One reference is ours. */
sc->sc_ref--;
@ -334,6 +336,7 @@ g_gate_getunit(int unit, int *errorp)
static void
g_gate_guard(void *arg)
{
struct bio_queue_head queue;
struct g_gate_softc *sc;
struct bintime curtime;
struct bio *bp, *bp2;
@ -341,24 +344,27 @@ g_gate_guard(void *arg)
sc = arg;
binuptime(&curtime);
g_gate_hold(sc->sc_unit, NULL);
bioq_init(&queue);
mtx_lock(&sc->sc_queue_mtx);
TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) {
if (curtime.sec - bp->bio_t0.sec < 5)
continue;
bioq_remove(&sc->sc_inqueue, bp);
sc->sc_queue_count--;
G_GATE_LOGREQ(1, bp, "Request timeout.");
g_io_deliver(bp, EIO);
bioq_insert_tail(&queue, bp);
}
TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, bp2) {
if (curtime.sec - bp->bio_t0.sec < 5)
continue;
bioq_remove(&sc->sc_outqueue, bp);
sc->sc_queue_count--;
bioq_insert_tail(&queue, bp);
}
mtx_unlock(&sc->sc_queue_mtx);
while ((bp = bioq_takefirst(&queue)) != NULL) {
G_GATE_LOGREQ(1, bp, "Request timeout.");
g_io_deliver(bp, EIO);
}
mtx_unlock(&sc->sc_queue_mtx);
if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) {
callout_reset(&sc->sc_callout, sc->sc_timeout * hz,
g_gate_guard, sc);
@ -542,6 +548,7 @@ g_gate_create(struct g_gate_ctl_create *ggio)
if (ropp != NULL) {
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, ropp);
if (error != 0) {
G_GATE_DEBUG(1, "Unable to attach to %s.", ropp->name);
@ -560,6 +567,7 @@ g_gate_create(struct g_gate_ctl_create *ggio)
ggio->gctl_unit = sc->sc_unit;
pp = g_new_providerf(gp, "%s", name);
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
pp->mediasize = ggio->gctl_mediasize;
pp->sectorsize = ggio->gctl_sectorsize;
sc->sc_provider = pp;
@ -636,6 +644,7 @@ g_gate_modify(struct g_gate_softc *sc, struct g_gate_ctl_modify *ggio)
return (EINVAL);
}
cp = g_new_consumer(sc->sc_provider->geom);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error != 0) {
G_GATE_DEBUG(1, "Unable to attach to %s.",

View file

@ -177,6 +177,8 @@ struct g_consumer {
int flags;
#define G_CF_SPOILED 0x1
#define G_CF_ORPHAN 0x4
#define G_CF_DIRECT_SEND 0x10
#define G_CF_DIRECT_RECEIVE 0x20
struct devstat *stat;
u_int nstart, nend;
@ -206,6 +208,8 @@ struct g_provider {
#define G_PF_WITHER 0x2
#define G_PF_ORPHAN 0x4
#define G_PF_ACCEPT_UNMAPPED 0x8
#define G_PF_DIRECT_SEND 0x10
#define G_PF_DIRECT_RECEIVE 0x20
/* Two fields for the implementing class to use */
void *private;
@ -393,6 +397,8 @@ g_free(void *ptr)
}; \
DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
int g_is_geom_thread(struct thread *td);
#endif /* _KERNEL */
/* geom_ctl.c */

View file

@ -222,6 +222,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused)
mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF);
cp = g_new_consumer(gp);
cp->private = sc;
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
KASSERT(error == 0,
("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error));

View file

@ -66,6 +66,7 @@ struct g_disk_softc {
struct sysctl_oid *sysctl_tree;
char led[64];
uint32_t state;
struct mtx start_mtx;
};
static g_access_t g_disk_access;
@ -255,6 +256,25 @@ g_disk_done(struct bio *bp)
g_destroy_bio(bp);
}
static void
g_disk_done_single(struct bio *bp)
{
struct bintime now;
struct g_disk_softc *sc;
bp->bio_completed = bp->bio_length - bp->bio_resid;
bp->bio_done = (void *)bp->bio_to;
bp->bio_to = LIST_FIRST(&bp->bio_disk->d_geom->provider);
if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) != 0) {
binuptime(&now);
sc = bp->bio_to->private;
mtx_lock(&sc->done_mtx);
devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now);
mtx_unlock(&sc->done_mtx);
}
g_io_deliver(bp, bp->bio_error);
}
static int
g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
{
@ -280,7 +300,7 @@ g_disk_start(struct bio *bp)
struct disk *dp;
struct g_disk_softc *sc;
int error;
off_t off;
off_t d_maxsize, off;
sc = bp->bio_to->private;
if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
@ -297,6 +317,22 @@ g_disk_start(struct bio *bp)
/* fall-through */
case BIO_READ:
case BIO_WRITE:
d_maxsize = (bp->bio_cmd == BIO_DELETE) ?
dp->d_delmaxsize : dp->d_maxsize;
if (bp->bio_length <= d_maxsize) {
bp->bio_disk = dp;
bp->bio_to = (void *)bp->bio_done;
bp->bio_done = g_disk_done_single;
bp->bio_pblkno = bp->bio_offset / dp->d_sectorsize;
bp->bio_bcount = bp->bio_length;
mtx_lock(&sc->start_mtx);
devstat_start_transaction_bio(dp->d_devstat, bp);
mtx_unlock(&sc->start_mtx);
g_disk_lock_giant(dp);
dp->d_strategy(bp);
g_disk_unlock_giant(dp);
break;
}
off = 0;
bp3 = NULL;
bp2 = g_clone_bio(bp);
@ -305,10 +341,6 @@ g_disk_start(struct bio *bp)
break;
}
do {
off_t d_maxsize;
d_maxsize = (bp->bio_cmd == BIO_DELETE) ?
dp->d_delmaxsize : dp->d_maxsize;
bp2->bio_offset += off;
bp2->bio_length -= off;
if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
@ -349,7 +381,9 @@ g_disk_start(struct bio *bp)
bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
bp2->bio_bcount = bp2->bio_length;
bp2->bio_disk = dp;
mtx_lock(&sc->start_mtx);
devstat_start_transaction_bio(dp->d_devstat, bp2);
mtx_unlock(&sc->start_mtx);
g_disk_lock_giant(dp);
dp->d_strategy(bp2);
g_disk_unlock_giant(dp);
@ -405,15 +439,11 @@ g_disk_start(struct bio *bp)
error = EOPNOTSUPP;
break;
}
bp2 = g_clone_bio(bp);
if (bp2 == NULL) {
g_io_deliver(bp, ENOMEM);
return;
}
bp2->bio_done = g_disk_done;
bp2->bio_disk = dp;
bp->bio_disk = dp;
bp->bio_to = (void *)bp->bio_done;
bp->bio_done = g_disk_done_single;
g_disk_lock_giant(dp);
dp->d_strategy(bp2);
dp->d_strategy(bp);
g_disk_unlock_giant(dp);
break;
default:
@ -518,17 +548,24 @@ g_disk_create(void *arg, int flag)
g_topology_assert();
dp = arg;
sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF);
mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
sc->dp = dp;
gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
gp->softc = sc;
pp = g_new_providerf(gp, "%s", gp->name);
devstat_remove_entry(pp->stat);
pp->stat = NULL;
dp->d_devstat->id = pp;
pp->mediasize = dp->d_mediasize;
pp->sectorsize = dp->d_sectorsize;
pp->stripeoffset = dp->d_stripeoffset;
pp->stripesize = dp->d_stripesize;
if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
pp->flags |= G_PF_ACCEPT_UNMAPPED;
if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
pp->flags |= G_PF_DIRECT_SEND;
pp->flags |= G_PF_DIRECT_RECEIVE;
if (bootverbose)
printf("GEOM: new disk %s\n", gp->name);
sysctl_ctx_init(&sc->sysctl_ctx);
@ -577,6 +614,7 @@ g_disk_providergone(struct g_provider *pp)
pp->private = NULL;
pp->geom->softc = NULL;
mtx_destroy(&sc->done_mtx);
mtx_destroy(&sc->start_mtx);
g_free(sc);
}

View file

@ -107,6 +107,7 @@ struct disk {
#define DISKFLAG_CANDELETE 0x4
#define DISKFLAG_CANFLUSHCACHE 0x8
#define DISKFLAG_UNMAPPED_BIO 0x10
#define DISKFLAG_DIRECT_COMPLETION 0x20
struct disk *disk_alloc(void);
void disk_create(struct disk *disk, int version);

View file

@ -39,6 +39,9 @@ LIST_HEAD(class_list_head, g_class);
TAILQ_HEAD(g_tailq_head, g_geom);
extern int g_collectstats;
#define G_STATS_PROVIDERS 1 /* Collect I/O stats for providers */
#define G_STATS_CONSUMERS 2 /* Collect I/O stats for consumers */
extern int g_debugflags;
/*
* 1 G_T_TOPOLOGY

View file

@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
static int g_io_transient_map_bio(struct bio *bp);
static struct g_bioq g_bio_run_down;
static struct g_bioq g_bio_run_up;
static struct g_bioq g_bio_run_task;
@ -310,6 +312,8 @@ g_io_check(struct bio *bp)
{
struct g_consumer *cp;
struct g_provider *pp;
off_t excess;
int error;
cp = bp->bio_from;
pp = bp->bio_to;
@ -354,11 +358,44 @@ g_io_check(struct bio *bp)
return (EIO);
if (bp->bio_offset > pp->mediasize)
return (EIO);
/* Truncate requests to the end of providers media. */
excess = bp->bio_offset + bp->bio_length;
if (excess > bp->bio_to->mediasize) {
KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
round_page(bp->bio_ma_offset +
bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
("excess bio %p too short", bp));
excess -= bp->bio_to->mediasize;
bp->bio_length -= excess;
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
bp->bio_ma_n = round_page(bp->bio_ma_offset +
bp->bio_length) / PAGE_SIZE;
}
if (excess > 0)
CTR3(KTR_GEOM, "g_down truncated bio "
"%p provider %s by %d", bp,
bp->bio_to->name, excess);
}
/* Deliver zero length transfers right here. */
if (bp->bio_length == 0) {
CTR2(KTR_GEOM, "g_down terminated 0-length "
"bp %p provider %s", bp, bp->bio_to->name);
return (0);
}
if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
(bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
if ((error = g_io_transient_map_bio(bp)) >= 0)
return (error);
}
break;
default:
break;
}
return (0);
return (EJUSTRETURN);
}
/*
@ -422,7 +459,8 @@ void
g_io_request(struct bio *bp, struct g_consumer *cp)
{
struct g_provider *pp;
int first;
struct mtx *mtxp;
int direct, error, first;
KASSERT(cp != NULL, ("NULL cp in g_io_request"));
KASSERT(bp != NULL, ("NULL bp in g_io_request"));
@ -472,40 +510,71 @@ g_io_request(struct bio *bp, struct g_consumer *cp)
KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
("Bio already on queue bp=%p", bp));
bp->bio_flags |= BIO_ONQUEUE;
if (g_collectstats)
if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
binuptime(&bp->bio_t0);
else
getbinuptime(&bp->bio_t0);
#ifdef GET_STACK_USAGE
direct = (cp->flags & G_CF_DIRECT_SEND) &&
(pp->flags & G_PF_DIRECT_RECEIVE) &&
!g_is_geom_thread(curthread) &&
(((pp->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
(bp->bio_flags & BIO_UNMAPPED) != 0) || THREAD_CAN_SLEEP());
if (direct) {
/* Block direct execution if less then half of stack left. */
size_t st, su;
GET_STACK_USAGE(st, su);
if (su * 2 > st)
direct = 0;
}
#else
direct = 0;
#endif
if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
g_bioq_lock(&g_bio_run_down);
g_run_classifiers(bp);
g_bioq_unlock(&g_bio_run_down);
}
/*
* The statistics collection is lockless, as such, but we
* can not update one instance of the statistics from more
* than one thread at a time, so grab the lock first.
*
* We also use the lock to protect the list of classifiers.
*/
g_bioq_lock(&g_bio_run_down);
if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1)
g_run_classifiers(bp);
if (g_collectstats & 1)
mtxp = mtx_pool_find(mtxpool_sleep, pp);
mtx_lock(mtxp);
if (g_collectstats & G_STATS_PROVIDERS)
devstat_start_transaction(pp->stat, &bp->bio_t0);
if (g_collectstats & 2)
if (g_collectstats & G_STATS_CONSUMERS)
devstat_start_transaction(cp->stat, &bp->bio_t0);
pp->nstart++;
cp->nstart++;
first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
g_bio_run_down.bio_queue_length++;
g_bioq_unlock(&g_bio_run_down);
mtx_unlock(mtxp);
/* Pass it on down. */
if (first)
wakeup(&g_wait_down);
if (direct) {
error = g_io_check(bp);
if (error >= 0) {
CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
"provider %s returned %d", bp, bp->bio_to->name,
error);
g_io_deliver(bp, error);
return;
}
bp->bio_to->geom->start(bp);
} else {
g_bioq_lock(&g_bio_run_down);
first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
bp->bio_flags |= BIO_ONQUEUE;
g_bio_run_down.bio_queue_length++;
g_bioq_unlock(&g_bio_run_down);
/* Pass it on down. */
if (first)
wakeup(&g_wait_down);
}
}
void
@ -514,7 +583,8 @@ g_io_deliver(struct bio *bp, int error)
struct bintime now;
struct g_consumer *cp;
struct g_provider *pp;
int first;
struct mtx *mtxp;
int direct, first;
KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
pp = bp->bio_to;
@ -560,33 +630,55 @@ g_io_deliver(struct bio *bp, int error)
bp->bio_bcount = bp->bio_length;
bp->bio_resid = bp->bio_bcount - bp->bio_completed;
#ifdef GET_STACK_USAGE
direct = (pp->flags & G_PF_DIRECT_SEND) &&
(cp->flags & G_CF_DIRECT_RECEIVE) &&
!g_is_geom_thread(curthread);
if (direct) {
/* Block direct execution if less then half of stack left. */
size_t st, su;
GET_STACK_USAGE(st, su);
if (su * 2 > st)
direct = 0;
}
#else
direct = 0;
#endif
/*
* The statistics collection is lockless, as such, but we
* can not update one instance of the statistics from more
* than one thread at a time, so grab the lock first.
*/
if (g_collectstats)
if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
binuptime(&now);
g_bioq_lock(&g_bio_run_up);
if (g_collectstats & 1)
mtxp = mtx_pool_find(mtxpool_sleep, cp);
mtx_lock(mtxp);
if (g_collectstats & G_STATS_PROVIDERS)
devstat_end_transaction_bio_bt(pp->stat, bp, &now);
if (g_collectstats & 2)
if (g_collectstats & G_STATS_CONSUMERS)
devstat_end_transaction_bio_bt(cp->stat, bp, &now);
cp->nend++;
pp->nend++;
mtx_unlock(mtxp);
if (error != ENOMEM) {
bp->bio_error = error;
first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
bp->bio_flags |= BIO_ONQUEUE;
g_bio_run_up.bio_queue_length++;
g_bioq_unlock(&g_bio_run_up);
if (first)
wakeup(&g_wait_up);
if (direct) {
biodone(bp);
} else {
g_bioq_lock(&g_bio_run_up);
first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
bp->bio_flags |= BIO_ONQUEUE;
g_bio_run_up.bio_queue_length++;
g_bioq_unlock(&g_bio_run_up);
if (first)
wakeup(&g_wait_up);
}
return;
}
g_bioq_unlock(&g_bio_run_up);
if (bootverbose)
printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
@ -642,11 +734,10 @@ g_io_transient_map_bio(struct bio *bp)
if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
if (transient_map_retries != 0 &&
retried >= transient_map_retries) {
g_io_deliver(bp, EDEADLK/* XXXKIB */);
CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
bp, bp->bio_to->name);
atomic_add_int(&transient_map_hard_failures, 1);
return (1);
return (EDEADLK/* XXXKIB */);
} else {
/*
* Naive attempt to quisce the I/O to get more
@ -666,14 +757,13 @@ g_io_transient_map_bio(struct bio *bp)
bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
bp->bio_flags |= BIO_TRANSIENT_MAPPING;
bp->bio_flags &= ~BIO_UNMAPPED;
return (0);
return (EJUSTRETURN);
}
void
g_io_schedule_down(struct thread *tp __unused)
{
struct bio *bp;
off_t excess;
int error;
for(;;) {
@ -692,59 +782,15 @@ g_io_schedule_down(struct thread *tp __unused)
pause("g_down", hz/10);
pace--;
}
CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
bp->bio_to->name);
error = g_io_check(bp);
if (error) {
if (error >= 0) {
CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
"%s returned %d", bp, bp->bio_to->name, error);
g_io_deliver(bp, error);
continue;
}
CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
bp->bio_to->name);
switch (bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
case BIO_DELETE:
/* Truncate requests to the end of providers media. */
/*
* XXX: What if we truncate because of offset being
* bad, not length?
*/
excess = bp->bio_offset + bp->bio_length;
if (excess > bp->bio_to->mediasize) {
KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
round_page(bp->bio_ma_offset +
bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
("excess bio %p too short", bp));
excess -= bp->bio_to->mediasize;
bp->bio_length -= excess;
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
bp->bio_ma_n = round_page(
bp->bio_ma_offset +
bp->bio_length) / PAGE_SIZE;
}
if (excess > 0)
CTR3(KTR_GEOM, "g_down truncated bio "
"%p provider %s by %d", bp,
bp->bio_to->name, excess);
}
/* Deliver zero length transfers right here. */
if (bp->bio_length == 0) {
g_io_deliver(bp, 0);
CTR2(KTR_GEOM, "g_down terminated 0-length "
"bp %p provider %s", bp, bp->bio_to->name);
continue;
}
break;
default:
break;
}
if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
(bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
(bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
if (g_io_transient_map_bio(bp))
continue;
}
THREAD_NO_SLEEPING();
CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
"len %ld", bp, bp->bio_to->name, bp->bio_offset,

View file

@ -124,6 +124,13 @@ g_event_procbody(void *arg)
/* NOTREACHED */
}
int
g_is_geom_thread(struct thread *td)
{
return (td == g_up_td || td == g_down_td || td == g_event_td);
}
static void
geom_shutdown(void *foo __unused)
{

View file

@ -396,8 +396,10 @@ g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length
pp->stripeoffset = pp2->stripeoffset + offset;
if (pp->stripesize > 0)
pp->stripeoffset %= pp->stripesize;
if (gsp->nhotspot == 0)
if (gsp->nhotspot == 0) {
pp->flags |= pp2->flags & G_PF_ACCEPT_UNMAPPED;
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
}
if (0 && bootverbose)
printf("GEOM: Configure %s, start %jd length %jd end %jd\n",
pp->name, (intmax_t)offset, (intmax_t)length,
@ -430,16 +432,20 @@ g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int r
{
struct g_slicer *gsp;
struct g_slice_hot *gsl, *gsl2;
struct g_consumer *cp;
struct g_provider *pp;
g_trace(G_T_TOPOLOGY, "g_slice_conf_hot(%s, idx: %d, off: %jd, len: %jd)",
gp->name, idx, (intmax_t)offset, (intmax_t)length);
g_topology_assert();
gsp = gp->softc;
/* Deny unmapped I/O if hotspots are used. */
/* Deny unmapped I/O and direct dispatch if hotspots are used. */
if (gsp->nhotspot == 0) {
LIST_FOREACH(pp, &gp->provider, provider)
pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
pp->flags &= ~(G_PF_ACCEPT_UNMAPPED |
G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE);
LIST_FOREACH(cp, &gp->consumer, consumer)
cp->flags &= ~(G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE);
}
gsl = gsp->hotspot;
if(idx >= gsp->nhotspot) {
@ -511,6 +517,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co
if (gp->class->destroy_geom == NULL)
gp->class->destroy_geom = g_slice_destroy_geom;
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error == 0)
error = g_access(cp, 1, 0, 0);

View file

@ -102,14 +102,10 @@ g_vfs_done(struct bio *bip)
/*
* Collect statistics on synchronous and asynchronous read
* and write counts for disks that have associated filesystems.
* Since this run by the g_up thread it is single threaded and
* we do not need to use atomic increments on the counters.
*/
bp = bip->bio_caller2;
vp = bp->b_vp;
if (vp == NULL) {
mp = NULL;
} else {
if (vp != NULL) {
/*
* If not a disk vnode, use its associated mount point
* otherwise use the mountpoint associated with the disk.
@ -122,20 +118,20 @@ g_vfs_done(struct bio *bip)
mp = vp->v_mount;
else
mp = cdevp->si_mountpt;
VI_UNLOCK(vp);
}
if (mp != NULL) {
if (bp->b_iocmd == BIO_WRITE) {
if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
mp->mnt_stat.f_asyncwrites++;
else
mp->mnt_stat.f_syncwrites++;
} else {
if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
mp->mnt_stat.f_asyncreads++;
else
mp->mnt_stat.f_syncreads++;
if (mp != NULL) {
if (bp->b_iocmd == BIO_READ) {
if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
mp->mnt_stat.f_asyncreads++;
else
mp->mnt_stat.f_syncreads++;
} else if (bp->b_iocmd == BIO_WRITE) {
if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC)
mp->mnt_stat.f_asyncwrites++;
else
mp->mnt_stat.f_syncwrites++;
}
}
VI_UNLOCK(vp);
}
cp = bip->bio_from;
@ -260,6 +256,7 @@ g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr
vnode_create_vobject(vp, pp->mediasize, curthread);
*cpp = cp;
cp->private = vp;
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
bo->bo_ops = g_vfs_bufops;
bo->bo_private = cp;
bo->bo_bsize = pp->sectorsize;

View file

@ -394,6 +394,7 @@ g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
g_topology_lock();
cp = g_new_consumer(disk->d_softc->sc_geom);
cp->flags |= G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error != 0) {
g_destroy_consumer(cp);
@ -554,6 +555,7 @@ g_mirror_destroy_device(struct g_mirror_softc *sc)
g_topology_unlock();
mtx_destroy(&sc->sc_queue_mtx);
mtx_destroy(&sc->sc_events_mtx);
mtx_destroy(&sc->sc_done_mtx);
sx_xunlock(&sc->sc_lock);
sx_destroy(&sc->sc_lock);
}
@ -851,6 +853,27 @@ g_mirror_unidle(struct g_mirror_softc *sc)
}
}
static void
g_mirror_flush_done(struct bio *bp)
{
struct g_mirror_softc *sc;
struct bio *pbp;
pbp = bp->bio_parent;
sc = pbp->bio_to->geom->softc;
mtx_lock(&sc->sc_done_mtx);
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
pbp->bio_completed += bp->bio_completed;
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
mtx_unlock(&sc->sc_done_mtx);
g_io_deliver(pbp, pbp->bio_error);
} else
mtx_unlock(&sc->sc_done_mtx);
g_destroy_bio(bp);
}
static void
g_mirror_done(struct bio *bp)
{
@ -1037,23 +1060,19 @@ g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
continue;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL)
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
return;
}
bioq_insert_tail(&queue, cbp);
cbp->bio_done = g_std_done;
cbp->bio_done = g_mirror_flush_done;
cbp->bio_caller1 = disk;
cbp->bio_to = disk->d_consumer->provider;
}
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL) {
G_MIRROR_LOGREQ(3, cbp, "Sending request.");
disk = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
@ -1538,11 +1557,8 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
continue;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
while ((cbp = bioq_takefirst(&queue)) != NULL)
bioq_remove(&queue, cbp);
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
@ -1561,8 +1577,7 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
offset += cbp->bio_length;
data += cbp->bio_length;
}
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL) {
G_MIRROR_LOGREQ(3, cbp, "Sending request.");
disk = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
@ -1643,11 +1658,8 @@ g_mirror_register_request(struct bio *bp)
continue;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL)
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
@ -1662,9 +1674,7 @@ g_mirror_register_request(struct bio *bp)
("Consumer %s not opened (r%dw%de%d).",
cp->provider->name, cp->acr, cp->acw, cp->ace));
}
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
while ((cbp = bioq_takefirst(&queue)) != NULL) {
G_MIRROR_LOGREQ(3, cbp, "Sending request.");
cp = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
@ -1920,6 +1930,7 @@ g_mirror_sync_start(struct g_mirror_disk *disk)
sx_xunlock(&sc->sc_lock);
g_topology_lock();
cp = g_new_consumer(sc->sc_sync.ds_geom);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, sc->sc_provider);
KASSERT(error == 0,
("Cannot attach to %s (error=%d).", sc->sc_name, error));
@ -2034,6 +2045,7 @@ g_mirror_launch_provider(struct g_mirror_softc *sc)
g_topology_lock();
pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
pp->flags |= G_PF_DIRECT_RECEIVE;
pp->mediasize = sc->sc_mediasize;
pp->sectorsize = sc->sc_sectorsize;
pp->stripesize = 0;
@ -2082,10 +2094,8 @@ g_mirror_destroy_provider(struct g_mirror_softc *sc)
g_topology_lock();
g_error_provider(sc->sc_provider, ENXIO);
mtx_lock(&sc->sc_queue_mtx);
while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
bioq_remove(&sc->sc_queue, bp);
while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL)
g_io_deliver(bp, ENXIO);
}
mtx_unlock(&sc->sc_queue_mtx);
G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
sc->sc_provider->name);
@ -2896,6 +2906,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
TAILQ_INIT(&sc->sc_events);
mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
gp->softc = sc;
sc->sc_geom = gp;
@ -2914,6 +2925,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
sc->sc_name);
g_destroy_geom(sc->sc_sync.ds_geom);
mtx_destroy(&sc->sc_done_mtx);
mtx_destroy(&sc->sc_events_mtx);
mtx_destroy(&sc->sc_queue_mtx);
sx_destroy(&sc->sc_lock);

View file

@ -212,6 +212,8 @@ struct g_mirror_softc {
struct callout sc_callout;
struct root_hold_token *sc_rootmount;
struct mtx sc_done_mtx;
};
#define sc_name sc_geom->name

View file

@ -442,6 +442,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md)
gp->dumpconf = g_multipath_dumpconf;
pp = g_new_providerf(gp, "multipath/%s", md->md_name);
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
if (md->md_size != 0) {
pp->mediasize = md->md_size -
((md->md_uuid[0] != 0) ? md->md_sectorsize : 0);
@ -479,6 +480,7 @@ g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp)
}
nxtcp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
cp->private = NULL;
cp->index = MP_NEW;
error = g_attach(cp, pp);

View file

@ -107,6 +107,7 @@ g_nop_start(struct bio *bp)
gp = bp->bio_to->geom;
sc = gp->softc;
G_NOP_LOGREQ(bp, "Request received.");
mtx_lock(&sc->sc_lock);
switch (bp->bio_cmd) {
case BIO_READ:
sc->sc_reads++;
@ -119,6 +120,7 @@ g_nop_start(struct bio *bp)
failprob = sc->sc_wfailprob;
break;
}
mtx_unlock(&sc->sc_lock);
if (failprob > 0) {
u_int rval;
@ -224,6 +226,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
sc->sc_writes = 0;
sc->sc_readbytes = 0;
sc->sc_wrotebytes = 0;
mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF);
gp->softc = sc;
gp->start = g_nop_start;
gp->orphan = g_nop_orphan;
@ -232,10 +235,12 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
gp->dumpconf = g_nop_dumpconf;
newpp = g_new_providerf(gp, "%s", gp->name);
newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
newpp->mediasize = size;
newpp->sectorsize = secsize;
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error != 0) {
gctl_error(req, "Cannot attach to provider %s.", pp->name);
@ -251,6 +256,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
g_detach(cp);
g_destroy_consumer(cp);
g_destroy_provider(newpp);
mtx_destroy(&sc->sc_lock);
g_free(gp->softc);
g_destroy_geom(gp);
return (error);
@ -259,10 +265,12 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp,
static int
g_nop_destroy(struct g_geom *gp, boolean_t force)
{
struct g_nop_softc *sc;
struct g_provider *pp;
g_topology_assert();
if (gp->softc == NULL)
sc = gp->softc;
if (sc == NULL)
return (ENXIO);
pp = LIST_FIRST(&gp->provider);
if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
@ -277,8 +285,9 @@ g_nop_destroy(struct g_geom *gp, boolean_t force)
} else {
G_NOP_DEBUG(0, "Device %s removed.", gp->name);
}
g_free(gp->softc);
gp->softc = NULL;
mtx_destroy(&sc->sc_lock);
g_free(sc);
g_wither_geom(gp, ENXIO);
return (0);

View file

@ -65,6 +65,7 @@ struct g_nop_softc {
uintmax_t sc_writes;
uintmax_t sc_readbytes;
uintmax_t sc_wrotebytes;
struct mtx sc_lock;
};
#endif /* _KERNEL */

View file

@ -418,6 +418,7 @@ g_part_new_provider(struct g_geom *gp, struct g_part_table *table,
sbuf_finish(sb);
entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb));
sbuf_delete(sb);
entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
entry->gpe_pp->private = entry; /* Close the circle. */
}
entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */
@ -930,6 +931,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp)
LIST_INIT(&table->gpt_entry);
if (null == NULL) {
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error == 0)
error = g_access(cp, 1, 1, 1);
@ -1886,6 +1888,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
*/
gp = g_new_geomf(mp, "%s", pp->name);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
error = g_attach(cp, pp);
if (error == 0)
error = g_access(cp, 1, 0, 0);

View file

@ -792,6 +792,7 @@ g_raid_open_consumer(struct g_raid_softc *sc, const char *name)
if (pp == NULL)
return (NULL);
cp = g_new_consumer(sc->sc_geom);
cp->flags |= G_CF_DIRECT_RECEIVE;
if (g_attach(cp, pp) != 0) {
g_destroy_consumer(cp);
return (NULL);
@ -1670,6 +1671,7 @@ g_raid_launch_provider(struct g_raid_volume *vol)
}
pp = g_new_providerf(sc->sc_geom, "%s", name);
pp->flags |= G_PF_DIRECT_RECEIVE;
if (vol->v_tr->tro_class->trc_accept_unmapped) {
pp->flags |= G_PF_ACCEPT_UNMAPPED;
for (i = 0; i < vol->v_disks_count; i++) {
@ -2255,6 +2257,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
*/
gp->orphan = g_raid_taste_orphan;
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(cp, pp);
geom = NULL;

View file

@ -2143,6 +2143,7 @@ g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -1477,6 +1477,7 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -923,6 +923,7 @@ g_raid_md_taste_jmicron(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -919,6 +919,7 @@ g_raid_md_taste_nvidia(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -1176,6 +1176,7 @@ g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -1012,6 +1012,7 @@ g_raid_md_taste_sii(struct g_raid_md_object *md, struct g_class *mp,
}
rcp = g_new_consumer(geom);
rcp->flags |= G_CF_DIRECT_RECEIVE;
g_attach(rcp, pp);
if (g_access(rcp, 1, 1, 1) != 0)
; //goto fail1;

View file

@ -284,22 +284,25 @@ g_stripe_done(struct bio *bp)
pbp = bp->bio_parent;
sc = pbp->bio_to->geom->softc;
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
pbp->bio_completed += bp->bio_completed;
if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
bp->bio_length, 1);
bp->bio_data = bp->bio_caller1;
bp->bio_caller1 = NULL;
}
g_destroy_bio(bp);
mtx_lock(&sc->sc_lock);
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
pbp->bio_completed += bp->bio_completed;
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
mtx_unlock(&sc->sc_lock);
if (pbp->bio_driver1 != NULL)
uma_zfree(g_stripe_zone, pbp->bio_driver1);
g_io_deliver(pbp, pbp->bio_error);
}
} else
mtx_unlock(&sc->sc_lock);
g_destroy_bio(bp);
}
static int
@ -442,7 +445,6 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
sc = bp->bio_to->geom->softc;
addr = bp->bio_data;
stripesize = sc->sc_stripesize;
cbp = g_clone_bio(bp);
@ -454,10 +456,18 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_std_done;
if (bp->bio_length == length)
cbp->bio_done = g_std_done; /* Optimized lockless case. */
else
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
cbp->bio_length = length;
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
bp->bio_ma_n = round_page(bp->bio_ma_offset +
bp->bio_length) / PAGE_SIZE;
addr = NULL;
} else
addr = bp->bio_data;
cbp->bio_caller2 = sc->sc_disks[no];
/* offset -= offset % stripesize; */
@ -479,14 +489,21 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_std_done;
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
/*
* MIN() is in case when
* (bp->bio_length % sc->sc_stripesize) != 0.
*/
cbp->bio_length = MIN(stripesize, length);
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
cbp->bio_ma_offset += (uintptr_t)addr;
cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
cbp->bio_ma_offset %= PAGE_SIZE;
cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
cbp->bio_length) / PAGE_SIZE;
} else
cbp->bio_data = addr;
cbp->bio_caller2 = sc->sc_disks[no];
}
@ -536,15 +553,15 @@ g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
return;
}
bioq_insert_tail(&queue, cbp);
cbp->bio_done = g_std_done;
cbp->bio_caller1 = sc->sc_disks[no];
cbp->bio_done = g_stripe_done;
cbp->bio_caller2 = sc->sc_disks[no];
cbp->bio_to = sc->sc_disks[no]->provider;
}
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
G_STRIPE_LOGREQ(cbp, "Sending request.");
cp = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
cp = cbp->bio_caller2;
cbp->bio_caller2 = NULL;
g_io_request(cbp, cp);
}
}
@ -613,9 +630,12 @@ g_stripe_start(struct bio *bp)
* 3. Request size is bigger than stripesize * ndisks. If it isn't,
* there will be no need to send more than one I/O request to
* a provider, so there is nothing to optmize.
* and
* 4. Request is not unmapped.
*/
if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
bp->bio_length >= stripesize * sc->sc_ndisks) {
bp->bio_length >= stripesize * sc->sc_ndisks &&
(bp->bio_flags & BIO_UNMAPPED) == 0) {
fast = 1;
}
error = 0;
@ -642,6 +662,7 @@ g_stripe_start(struct bio *bp)
static void
g_stripe_check_and_run(struct g_stripe_softc *sc)
{
struct g_provider *dp;
off_t mediasize, ms;
u_int no, sectorsize = 0;
@ -651,6 +672,9 @@ g_stripe_check_and_run(struct g_stripe_softc *sc)
sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
sc->sc_name);
sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
if (g_stripe_fast == 0)
sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
/*
* Find the smallest disk.
*/
@ -660,14 +684,21 @@ g_stripe_check_and_run(struct g_stripe_softc *sc)
mediasize -= mediasize % sc->sc_stripesize;
sectorsize = sc->sc_disks[0]->provider->sectorsize;
for (no = 1; no < sc->sc_ndisks; no++) {
ms = sc->sc_disks[no]->provider->mediasize;
dp = sc->sc_disks[no]->provider;
ms = dp->mediasize;
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
ms -= sc->sc_disks[no]->provider->sectorsize;
ms -= dp->sectorsize;
ms -= ms % sc->sc_stripesize;
if (ms < mediasize)
mediasize = ms;
sectorsize = lcm(sectorsize,
sc->sc_disks[no]->provider->sectorsize);
sectorsize = lcm(sectorsize, dp->sectorsize);
/* A provider underneath us doesn't support unmapped */
if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
G_STRIPE_DEBUG(1, "Cancelling unmapped "
"because of %s.", dp->name);
sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
}
}
sc->sc_provider->sectorsize = sectorsize;
sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
@ -729,6 +760,7 @@ g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
fcp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
cp->private = NULL;
cp->index = no;
error = g_attach(cp, pp);
@ -830,6 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
for (no = 0; no < sc->sc_ndisks; no++)
sc->sc_disks[no] = NULL;
sc->sc_type = type;
mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
gp->softc = sc;
sc->sc_geom = gp;
@ -878,6 +911,7 @@ g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
gp->name));
free(sc->sc_disks, M_STRIPE);
mtx_destroy(&sc->sc_lock);
free(sc, M_STRIPE);
G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
g_wither_geom(gp, ENXIO);

View file

@ -76,6 +76,7 @@ struct g_stripe_softc {
uint16_t sc_ndisks;
uint32_t sc_stripesize;
uint32_t sc_stripebits;
struct mtx sc_lock;
};
#define sc_name sc_geom->name
#endif /* _KERNEL */

View file

@ -106,6 +106,7 @@ g_zero_init(struct g_class *mp)
gp->start = g_zero_start;
gp->access = g_std_access;
gpp = pp = g_new_providerf(gp, "%s", gp->name);
pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
if (!g_zero_clear)
pp->flags |= G_PF_ACCEPT_UNMAPPED;
pp->mediasize = 1152921504606846976LLU;

View file

@ -131,6 +131,7 @@ devstat_new_entry(const void *dev_name,
ds = devstat_alloc();
mtx_lock(&devstat_mutex);
if (unit_number == -1) {
ds->unit_number = unit_number;
ds->id = dev_name;
binuptime(&ds->creation_time);
devstat_generation++;
@ -242,7 +243,7 @@ devstat_remove_entry(struct devstat *ds)
/* Remove this entry from the devstat queue */
atomic_add_acq_int(&ds->sequence1, 1);
if (ds->id == NULL) {
if (ds->unit_number != -1) {
devstat_num_devs--;
STAILQ_REMOVE(devstat_head, ds, devstat, dev_links);
}

View file

@ -793,6 +793,8 @@ extern pid_t pid_max;
#define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--)
#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0)
#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
extern u_long pidhash;