getblk: reduce time under bufobj lock

Use the new pctrie combined insert/lookup facility to reduce work and
time under the bufobj interlock when associating a buf with a vnode.

We now do one lookup in the dirty tree and one combined lookup/insert in
the clean tree instead of one lookup in dirty, two in clean, and then an
insert in clean.  We also avoid touching the possibly unrelated buf at
the tail of the queue.

Also correct an issue where the actual order of the tail queue depended
on the insertion order due to sign issues.

Reviewed by:	kib (previous version), dougm, markj
Sponsored by:	Dell EMC Isilon
Differential Revision:	https://reviews.freebsd.org/D45395
This commit is contained in:
Ryan Libby 2024-06-05 20:21:22 -07:00
parent bbf81f4629
commit 780666c09b
3 changed files with 100 additions and 51 deletions

View file

@ -4231,36 +4231,30 @@ getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag,
}
/*
* This code is used to make sure that a buffer is not
* created while the getnewbuf routine is blocked.
* This can be a problem whether the vnode is locked or not.
* If the buffer is created out from under us, we have to
* throw away the one we just created.
*
* Note: this must occur before we associate the buffer
* with the vp especially considering limitations in
* the splay tree implementation when dealing with duplicate
* lblkno's.
* Insert the buffer into the hash, so that it can
* be found by incore.
*
* We don't hold the bufobj interlock while allocating the new
* buffer. Consequently, we can race on buffer creation. This
* can be a problem whether the vnode is locked or not. If the
* buffer is created out from under us, we have to throw away
* the one we just created.
*/
BO_LOCK(bo);
if (gbincore(bo, blkno)) {
BO_UNLOCK(bo);
bp->b_lblkno = blkno;
bp->b_blkno = d_blkno;
bp->b_offset = offset;
error = bgetvp(vp, bp);
if (error != 0) {
KASSERT(error == EEXIST,
("getblk: unexpected error %d from bgetvp",
error));
bp->b_flags |= B_INVAL;
bufspace_release(bufdomain(bp), maxsize);
brelse(bp);
goto loop;
}
/*
* Insert the buffer into the hash, so that it can
* be found by incore.
*/
bp->b_lblkno = blkno;
bp->b_blkno = d_blkno;
bp->b_offset = offset;
bgetvp(vp, bp);
BO_UNLOCK(bo);
/*
* set B_VMIO bit. allocbuf() the buffer bigger. Since the
* buffer size starts out as 0, B_CACHE will be set by

View file

@ -2697,12 +2697,12 @@ buf_vlist_remove(struct buf *bp)
}
/*
* Add the buffer to the sorted clean or dirty block list.
*
* NOTE: xflags is passed as a constant, optimizing this inline function!
* Add the buffer to the sorted clean or dirty block list. Return zero on
* success, EEXIST if a buffer with this identity already exists, or another
* error on allocation failure.
*/
static void
buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
static inline int
buf_vlist_find_or_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
{
struct bufv *bv;
struct buf *n;
@ -2713,30 +2713,69 @@ buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
("buf_vlist_add: bo %p does not allow bufs", bo));
KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
("dead bo %p", bo));
KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
bp->b_xflags |= xflags;
KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == xflags,
("buf_vlist_add: b_xflags %#x not set on bp %p", xflags, bp));
if (xflags & BX_VNDIRTY)
bv = &bo->bo_dirty;
else
bv = &bo->bo_clean;
/*
* Keep the list ordered. Optimize empty list insertion. Assume
* we tend to grow at the tail so lookup_le should usually be cheaper
* than _ge.
*/
if (bv->bv_cnt == 0 ||
bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
error = BUF_PCTRIE_INSERT_LOOKUP_LE(&bv->bv_root, bp, &n);
if (n == NULL) {
KASSERT(error != EEXIST,
("buf_vlist_add: EEXIST but no existing buf found: bp %p",
bp));
} else {
KASSERT((uint64_t)n->b_lblkno <= (uint64_t)bp->b_lblkno,
("buf_vlist_add: out of order insert/lookup: bp %p n %p",
bp, n));
KASSERT((n->b_lblkno == bp->b_lblkno) == (error == EEXIST),
("buf_vlist_add: inconsistent result for existing buf: "
"error %d bp %p n %p", error, bp, n));
}
if (error != 0)
return (error);
/* Keep the list ordered. */
if (n == NULL) {
KASSERT(TAILQ_EMPTY(&bv->bv_hd) ||
(uint64_t)bp->b_lblkno <
(uint64_t)TAILQ_FIRST(&bv->bv_hd)->b_lblkno,
("buf_vlist_add: queue order: "
"%p should be before first %p",
bp, TAILQ_FIRST(&bv->bv_hd)));
TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
else
} else {
KASSERT(TAILQ_NEXT(n, b_bobufs) == NULL ||
(uint64_t)bp->b_lblkno <
(uint64_t)TAILQ_NEXT(n, b_bobufs)->b_lblkno,
("buf_vlist_add: queue order: "
"%p should be before next %p",
bp, TAILQ_NEXT(n, b_bobufs)));
TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
if (error)
panic("buf_vlist_add: Preallocated nodes insufficient.");
}
bv->bv_cnt++;
return (0);
}
/*
* Add the buffer to the sorted clean or dirty block list.
*
* NOTE: xflags is passed as a constant, optimizing this inline function!
*/
static void
buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
{
int error;
KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0,
("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
bp->b_xflags |= xflags;
error = buf_vlist_find_or_add(bp, bo, xflags);
if (error)
panic("buf_vlist_add: error=%d", error);
}
/*
@ -2775,26 +2814,42 @@ gbincore_unlocked(struct bufobj *bo, daddr_t lblkno)
/*
* Associate a buffer with a vnode.
*/
void
int
bgetvp(struct vnode *vp, struct buf *bp)
{
struct bufobj *bo;
int error;
bo = &vp->v_bufobj;
ASSERT_BO_WLOCKED(bo);
ASSERT_BO_UNLOCKED(bo);
VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
("bgetvp: bp already attached! %p", bp));
vhold(vp);
/*
* Add the buf to the vnode's clean list unless we lost a race and find
* an existing buf in either dirty or clean.
*/
bp->b_vp = vp;
bp->b_bufobj = bo;
/*
* Insert onto list for new vnode.
*/
buf_vlist_add(bp, bo, BX_VNCLEAN);
bp->b_xflags |= BX_VNCLEAN;
error = EEXIST;
BO_LOCK(bo);
if (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, bp->b_lblkno) == NULL)
error = buf_vlist_find_or_add(bp, bo, BX_VNCLEAN);
BO_UNLOCK(bo);
if (__predict_true(error == 0)) {
vhold(vp);
return (0);
}
if (error != EEXIST)
panic("bgetvp: buf_vlist_add error: %d", error);
bp->b_vp = NULL;
bp->b_bufobj = NULL;
bp->b_xflags &= ~BX_VNCLEAN;
return (error);
}
/*

View file

@ -603,7 +603,7 @@ void vfs_unbusy_pages(struct buf *);
int vmapbuf(struct buf *, void *, size_t, int);
void vunmapbuf(struct buf *);
void brelvp(struct buf *);
void bgetvp(struct vnode *, struct buf *);
int bgetvp(struct vnode *, struct buf *) __result_use_check;
void pbgetbo(struct bufobj *bo, struct buf *bp);
void pbgetvp(struct vnode *, struct buf *);
void pbrelbo(struct buf *);