mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-15 12:54:27 +00:00
Make it possible to rebuild degraded RAID5 plexes. Note that it is
currently not possible to do this while the volume is mounted. MFC in: 1 week
This commit is contained in:
parent
af9cb375e8
commit
c3aadfb9d6
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=135966
|
@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
|
|||
int gv_init_plex(struct gv_plex *);
|
||||
int gv_init_sd(struct gv_sd *);
|
||||
void gv_init_td(void *);
|
||||
void gv_rebuild_plex(struct gv_plex *);
|
||||
void gv_rebuild_td(void *);
|
||||
void gv_start_plex(struct gv_plex *);
|
||||
void gv_start_vol(struct gv_volume *);
|
||||
void gv_sync(struct gv_volume *);
|
||||
|
@ -117,8 +119,12 @@ gv_start_plex(struct gv_plex *p)
|
|||
v = p->vol_sc;
|
||||
if ((v != NULL) && (v->plexcount > 1))
|
||||
gv_sync(v);
|
||||
else if (p->org == GV_PLEX_RAID5)
|
||||
gv_init_plex(p);
|
||||
else if (p->org == GV_PLEX_RAID5) {
|
||||
if (p->state == GV_PLEX_DEGRADED)
|
||||
gv_rebuild_plex(p);
|
||||
else
|
||||
gv_init_plex(p);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -142,7 +148,9 @@ gv_start_vol(struct gv_volume *v)
|
|||
case GV_PLEX_DOWN:
|
||||
gv_init_plex(p);
|
||||
break;
|
||||
case GV_PLEX_DEGRADED: /* XXX not yet */
|
||||
case GV_PLEX_DEGRADED:
|
||||
gv_rebuild_plex(p);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
@ -191,6 +199,22 @@ gv_sync(struct gv_volume *v)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
gv_rebuild_plex(struct gv_plex *p)
|
||||
{
|
||||
struct gv_sync_args *sync;
|
||||
|
||||
if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom))
|
||||
return;
|
||||
|
||||
sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
|
||||
sync->to = p;
|
||||
sync->syncsize = GV_DFLT_SYNCSIZE;
|
||||
|
||||
kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
|
||||
p->name);
|
||||
}
|
||||
|
||||
int
|
||||
gv_init_plex(struct gv_plex *p)
|
||||
{
|
||||
|
@ -225,6 +249,94 @@ gv_init_sd(struct gv_sd *s)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/* This thread is responsible for rebuilding a degraded RAID5 plex. */
|
||||
void
|
||||
gv_rebuild_td(void *arg)
|
||||
{
|
||||
struct bio *bp;
|
||||
struct gv_plex *p;
|
||||
struct g_consumer *cp;
|
||||
struct gv_sync_args *sync;
|
||||
u_char *buf;
|
||||
off_t i;
|
||||
int error;
|
||||
|
||||
buf = NULL;
|
||||
bp = NULL;
|
||||
|
||||
sync = arg;
|
||||
p = sync->to;
|
||||
p->synced = 0;
|
||||
p->flags |= GV_PLEX_SYNCING;
|
||||
cp = p->consumer;
|
||||
|
||||
g_topology_lock();
|
||||
error = g_access(cp, 1, 1, 0);
|
||||
if (error) {
|
||||
g_topology_unlock();
|
||||
printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
|
||||
"%d\n", p->name, error);
|
||||
kthread_exit(error);
|
||||
}
|
||||
g_topology_unlock();
|
||||
|
||||
buf = g_malloc(sync->syncsize, M_WAITOK);
|
||||
|
||||
printf("GEOM_VINUM: rebuild of %s started\n", p->name);
|
||||
i = 0;
|
||||
for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
|
||||
/*
|
||||
if (i + sync->syncsize > p->size)
|
||||
sync->syncsize = p->size - i;
|
||||
*/
|
||||
bp = g_new_bio();
|
||||
if (bp == NULL) {
|
||||
printf("GEOM_VINUM: rebuild of %s failed creating bio: "
|
||||
"out of memory\n", p->name);
|
||||
break;
|
||||
}
|
||||
bp->bio_cmd = BIO_WRITE;
|
||||
bp->bio_done = NULL;
|
||||
bp->bio_data = buf;
|
||||
bp->bio_cflags |= GV_BIO_REBUILD;
|
||||
bp->bio_offset = i;
|
||||
bp->bio_length = p->stripesize;
|
||||
|
||||
/* Schedule it down ... */
|
||||
g_io_request(bp, cp);
|
||||
|
||||
/* ... and wait for the result. */
|
||||
error = biowait(bp, "gwrite");
|
||||
if (error) {
|
||||
printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
|
||||
"errno: %d\n", p->name, i, error);
|
||||
break;
|
||||
}
|
||||
g_destroy_bio(bp);
|
||||
bp = NULL;
|
||||
}
|
||||
|
||||
if (bp != NULL)
|
||||
g_destroy_bio(bp);
|
||||
if (buf != NULL)
|
||||
g_free(buf);
|
||||
|
||||
g_topology_lock();
|
||||
g_access(cp, -1, -1, 0);
|
||||
gv_save_config_all(p->vinumconf);
|
||||
g_topology_unlock();
|
||||
|
||||
p->flags &= ~GV_PLEX_SYNCING;
|
||||
p->synced = 0;
|
||||
|
||||
/* Successful initialization. */
|
||||
if (!error)
|
||||
printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
|
||||
|
||||
g_free(sync);
|
||||
kthread_exit(error);
|
||||
}
|
||||
|
||||
void
|
||||
gv_sync_td(void *arg)
|
||||
{
|
||||
|
|
|
@ -365,9 +365,15 @@ gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags)
|
|||
(intmax_t)s->size, (intmax_t)s->size / MEGABYTE);
|
||||
sbuf_printf(sb, "\t\tState: %s\n", gv_sdstate(s->state));
|
||||
|
||||
if (s->state == GV_SD_INITIALIZING) {
|
||||
sbuf_printf(sb, "\t\tInitialized: %16jd bytes "
|
||||
"(%d%%)\n", (intmax_t)s->initialized,
|
||||
if (s->state == GV_SD_INITIALIZING ||
|
||||
s->state == GV_SD_REVIVING) {
|
||||
if (s->state == GV_SD_INITIALIZING)
|
||||
sbuf_printf(sb, "\t\tInitialized: ");
|
||||
else
|
||||
sbuf_printf(sb, "\t\tRevived: ");
|
||||
|
||||
sbuf_printf(sb, "%16jd bytes (%d%%)\n",
|
||||
(intmax_t)s->initialized,
|
||||
(int)((s->initialized * 100) / s->size));
|
||||
}
|
||||
|
||||
|
@ -377,20 +383,20 @@ gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags)
|
|||
gv_roughlength(s->plex_offset, 1));
|
||||
}
|
||||
|
||||
if (s->state == GV_SD_REVIVING) {
|
||||
/* XXX */
|
||||
}
|
||||
|
||||
sbuf_printf(sb, "\t\tDrive %s (%s) at offset %jd (%s)\n",
|
||||
s->drive,
|
||||
s->drive_sc == NULL ? "*missing*" : s->drive_sc->name,
|
||||
(intmax_t)s->drive_offset,
|
||||
gv_roughlength(s->drive_offset, 1));
|
||||
} else {
|
||||
/* XXX reviving and initializing... */
|
||||
sbuf_printf(sb, "S %-21s State: ", s->name);
|
||||
if (s->state == GV_SD_INITIALIZING) {
|
||||
sbuf_printf(sb, "I %d%%\t",
|
||||
if (s->state == GV_SD_INITIALIZING ||
|
||||
s->state == GV_SD_REVIVING) {
|
||||
if (s->state == GV_SD_INITIALIZING)
|
||||
sbuf_printf(sb, "I ");
|
||||
else
|
||||
sbuf_printf(sb, "R ");
|
||||
sbuf_printf(sb, "%d%%\t",
|
||||
(int)((s->initialized * 100) / s->size));
|
||||
} else {
|
||||
sbuf_printf(sb, "%s\t", gv_sdstate(s->state));
|
||||
|
|
|
@ -295,7 +295,9 @@ gv_plex_worker(void *arg)
|
|||
/* A completed request. */
|
||||
if (bp->bio_cflags & GV_BIO_DONE) {
|
||||
g_free(bq);
|
||||
if (bp->bio_cflags & GV_BIO_SYNCREQ) {
|
||||
|
||||
if (bp->bio_cflags & GV_BIO_SYNCREQ ||
|
||||
bp->bio_cflags & GV_BIO_REBUILD) {
|
||||
s = bp->bio_to->private;
|
||||
if (bp->bio_error == 0)
|
||||
s->initialized += bp->bio_length;
|
||||
|
@ -306,8 +308,11 @@ gv_plex_worker(void *arg)
|
|||
g_topology_unlock();
|
||||
s->initialized = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (bp->bio_cflags & GV_BIO_SYNCREQ)
|
||||
g_std_done(bp);
|
||||
} else
|
||||
else
|
||||
gv_plex_completed_request(p, bp);
|
||||
/*
|
||||
* A sub-request that was hold back because it interfered with
|
||||
|
@ -457,7 +462,12 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
|
|||
wp->bio = bp;
|
||||
TAILQ_INIT(&wp->bits);
|
||||
|
||||
err = gv_build_raid5_req(p, wp, bp, addr, boff, bcount);
|
||||
if (bp->bio_cflags & GV_BIO_REBUILD)
|
||||
err = gv_rebuild_raid5(p, wp, bp, addr,
|
||||
boff, bcount);
|
||||
else
|
||||
err = gv_build_raid5_req(p, wp, bp, addr,
|
||||
boff, bcount);
|
||||
|
||||
/*
|
||||
* Building the sub-request failed, we probably need to
|
||||
|
|
|
@ -77,6 +77,117 @@ gv_stripe_active(struct gv_plex *p, struct bio *bp)
|
|||
return (overlap);
|
||||
}
|
||||
|
||||
int
|
||||
gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
|
||||
caddr_t addr, off_t boff, off_t bcount)
|
||||
{
|
||||
struct gv_sd *broken, *s;
|
||||
struct gv_bioq *bq;
|
||||
struct bio *cbp, *pbp;
|
||||
off_t len_left, real_len, real_off, stripeend, stripeoff, stripestart;
|
||||
|
||||
if (p == NULL || LIST_EMPTY(&p->subdisks))
|
||||
return (ENXIO);
|
||||
|
||||
/* Offset of the start address from the start of the stripe. */
|
||||
stripeoff = boff % (p->stripesize * (p->sdcount - 1));
|
||||
KASSERT(stripeoff >= 0, ("gv_build_raid5_request: stripeoff < 0"));
|
||||
|
||||
/* The offset of the stripe on this subdisk. */
|
||||
stripestart = (boff - stripeoff) / (p->sdcount - 1);
|
||||
KASSERT(stripestart >= 0, ("gv_build_raid5_request: stripestart < 0"));
|
||||
|
||||
stripeoff %= p->stripesize;
|
||||
|
||||
/* The offset of the request on this subdisk. */
|
||||
real_off = stripestart + stripeoff;
|
||||
|
||||
stripeend = stripestart + p->stripesize;
|
||||
len_left = stripeend - real_off;
|
||||
KASSERT(len_left >= 0, ("gv_build_raid5_request: len_left < 0"));
|
||||
|
||||
/* Find the right subdisk. */
|
||||
broken = NULL;
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
if (s->state != GV_SD_UP)
|
||||
broken = s;
|
||||
}
|
||||
|
||||
/* Parity stripe not found. */
|
||||
if (broken == NULL)
|
||||
return (ENXIO);
|
||||
|
||||
switch (broken->state) {
|
||||
case GV_SD_UP:
|
||||
return (EINVAL);
|
||||
|
||||
case GV_SD_STALE:
|
||||
if (!(bp->bio_cflags & GV_BIO_REBUILD))
|
||||
return (ENXIO);
|
||||
|
||||
printf("GEOM_VINUM: sd %s is reviving\n", broken->name);
|
||||
gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
|
||||
break;
|
||||
|
||||
case GV_SD_REVIVING:
|
||||
break;
|
||||
|
||||
default:
|
||||
/* All other subdisk states mean it's not accessible. */
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
real_len = (bcount <= len_left) ? bcount : len_left;
|
||||
wp->length = real_len;
|
||||
wp->data = addr;
|
||||
wp->lockbase = real_off;
|
||||
|
||||
KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
|
||||
|
||||
/* Read all subdisks. */
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/* Skip the broken subdisk. */
|
||||
if (s == broken)
|
||||
continue;
|
||||
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_cmd = BIO_READ;
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = s->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
}
|
||||
|
||||
/* Write the parity data. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = broken->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
cbp->bio_cflags |= GV_BIO_REBUILD;
|
||||
wp->parity = cbp;
|
||||
|
||||
p->synced = boff;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Build a request group to perform (part of) a RAID5 request. */
|
||||
int
|
||||
gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
|
||||
|
@ -166,6 +277,9 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
|
|||
|
||||
KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
|
||||
|
||||
if ((p->flags & GV_PLEX_SYNCING) && (boff + real_len < p->synced))
|
||||
type = REQ_TYPE_NORMAL;
|
||||
|
||||
switch (bp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
/*
|
||||
|
|
|
@ -67,6 +67,8 @@ struct gv_raid5_packet {
|
|||
int gv_stripe_active(struct gv_plex *, struct bio *);
|
||||
int gv_build_raid5_req(struct gv_plex *, struct gv_raid5_packet *,
|
||||
struct bio *, caddr_t, off_t, off_t);
|
||||
int gv_rebuild_raid5(struct gv_plex *, struct gv_raid5_packet *,
|
||||
struct bio *, caddr_t, off_t, off_t);
|
||||
void gv_raid5_worker(void *);
|
||||
void gv_plex_done(struct bio *);
|
||||
|
||||
|
|
|
@ -113,6 +113,7 @@
|
|||
#define GV_BIO_ONHOLD 0x04
|
||||
#define GV_BIO_SYNCREQ 0x08
|
||||
#define GV_BIO_SUCCEED 0x10
|
||||
#define GV_BIO_REBUILD 0x20
|
||||
|
||||
/*
|
||||
* hostname is 256 bytes long, but we don't need to shlep multiple copies in
|
||||
|
|
Loading…
Reference in a new issue