From 5823cef910de97735977f58ef0c983c2b94a2594 Mon Sep 17 00:00:00 2001 From: Greg Lehey Date: Tue, 29 Feb 2000 06:16:06 +0000 Subject: [PATCH] set_drive_state: update subdisk states more correctly. set_sd_state: update the state of a subdisk in a multi-plex volume more correctly. update_plex_state: Bring the plex up correctly when the last subdisk comes up. checksdstate: Update comments. vpstate: Don't return an "up" state on a degraded, unattached plex. start_object: Return a sensible error message when trying to revive a subdisk whose drive is down. Previously it returned EBUSY. Approved-by: jkh --- sys/dev/vinum/vinumstate.c | 167 ++++++++++++++++++++----------------- 1 file changed, 91 insertions(+), 76 deletions(-) diff --git a/sys/dev/vinum/vinumstate.c b/sys/dev/vinum/vinumstate.c index c9aeb703f91c..3ed347fb458c 100644 --- a/sys/dev/vinum/vinumstate.c +++ b/sys/dev/vinum/vinumstate.c @@ -68,19 +68,19 @@ set_drive_state(int driveno, enum drivestate newstate, enum setstateflags flags) "vinum: drive %s is %s\n", drive->label.name, drive_state(drive->state)); - if ((drive->state == drive_up) - && (drive->vp == NULL)) /* should be open, but we're not */ - init_drive(drive, 1); /* which changes the state again */ - if (newstate < drive_up) /* drive going down, */ - queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */ - (union daemoninfo) drive); - if (newstate != oldstate) { /* state has changed */ + if (drive->state != oldstate) { /* state has changed */ for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* find this drive's subdisks */ if ((SD[sdno].state >= sd_referenced) && (SD[sdno].driveno == driveno)) /* belongs to this drive */ update_sd_state(sdno); /* update the state */ } } + if (newstate == drive_up) { /* want to bring it up */ + if ((drive->flags & VF_OPEN) == 0) /* should be open, but we're not */ + init_drive(drive, 1); /* which changes the state again */ + } else /* taking it down or worse */ + queue_daemon_request(daemonrq_closedrive, /* get the daemon to close it */ + (union daemoninfo) drive); if ((flags & setstate_configuring) == 0) /* configuring? */ save_config(); /* no: save the updated configuration now */ return 1; @@ -145,6 +145,11 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) if (flags & setstate_force) /* forcing it, */ break; /* just do it, and damn the consequences */ switch (sd->state) { + /* + * Perform the necessary tests. To allow + * the state transition, just break out of + * the switch. + */ case sd_crashed: case sd_reborn: case sd_down: /* been down, no data lost */ @@ -160,7 +165,7 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) if (oldstate != sd_reborn) { sd->state = sd_reborn; /* here it is again */ log(LOG_INFO, - "vinum: subdisk %s is %s, not %s\n", + "vinum: %s is %s, not %s\n", sd->name, sd_state(sd->state), sd_state(newstate)); @@ -177,21 +182,22 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) case sd_empty: case sd_initialized: /* - * If we're associated with a plex which - * is down, or which is the only one in - * the volume, and we're not a RAID-5 - * plex, we can come up without being - * inconsistent. Internally, we use the - * force flag to bring up a RAID-5 plex - * after initialization. + * If we're not part of a plex, or the + * plex is not part of a volume with other + * plexes which are up, we can come up + * without being inconsistent. + * + * If we're part of a parity plex, we'll + * come up if the caller uses force. This + * is the way we bring them up after + * initialization. */ - if ((sd->plexno >= 0) - && ((PLEX[sd->plexno].organization != plex_raid5) - || (flags & setstate_force)) - && ((PLEX[sd->plexno].state < plex_firstup) - || (PLEX[sd->plexno].volno < 0) - || (VOL[PLEX[sd->plexno].volno].plexes == 1))) + if ((sd->plexno < 0) + || ((vpstate(&PLEX[sd->plexno]) & volplex_otherup) == 0) + || (isparity((&PLEX[sd->plexno])) + && (flags & setstate_force))) break; + /* Otherwise it's just out of date */ /* FALLTHROUGH */ @@ -204,16 +210,16 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) * * 2. If the subdisk is part of a * one-plex volume or an unattached - * plex, and it's not RAID-5, we - * *can't revive*. The subdisk - * doesn't change its state. + * plex, and it's not RAID-4 or + * RAID-5, we *can't revive*. The + * subdisk doesn't change its state. * * 3. If the subdisk is part of a * one-plex volume or an unattached - * plex, and it's RAID-5, but more - * than one subdisk is down, we *still - * can't revive*. The subdisk doesn't - * change its state. + * plex, and it's RAID-4 or RAID-5, + * but more than one subdisk is down, + * we *still can't revive*. The + * subdisk doesn't change its state. * * 4. If the subdisk is part of a * multi-plex volume, we'll change to @@ -236,11 +242,12 @@ set_sd_state(int sdno, enum sdstate newstate, enum setstateflags flags) * * 1: we don't have a volume * 2: we're the only plex in the volume - * 3: we're a RAID-5 plex, and more than one subdisk is down. + * 3: we're a RAID-4 or RAID-5 plex, and + * more than one subdisk is down. */ if (((vol == NULL) || (vol->plexes == 1)) - && ((plex->organization != plex_raid5) + && ((!isparity(plex)) || (plex->sddowncount > 1))) { if (sd->state == sd_initializing) /* it's finished initializing */ sd->state = sd_initialized; @@ -493,9 +500,20 @@ update_plex_state(int plexno) if (statemap & sd_initstate) /* something initializing? */ plex->state = plex_initializing; /* yup, that makes the plex the same */ - else if ((statemap == sd_emptystate) /* all subdisks empty */ - ||(statemap == sd_initializedstate)) { /* or all initialized */ - if ((vps & volplex_otherup) == 0) { /* no other plex is up */ + else if (statemap == sd_upstate) + /* + * All the subdisks are up. This also means that + * they are consistent, so we can just bring + * the plex up + */ + plex->state = plex_up; + else if (isparity(plex) /* RAID-4 or RAID-5 plex */ + &&(plex->sddowncount == 1)) /* and exactly one subdisk down */ + plex->state = plex_degraded; /* limping a bit */ + else if (((statemap & ~sd_downstate) == sd_emptystate) /* all subdisks empty */ + ||((statemap & ~sd_downstate) + == (statemap & ~sd_downstate & (sd_initializedstate | sd_upstate)))) { + if ((vps & volplex_otherup) == 0) { /* no other plex is up */ struct volume *vol = &VOL[plex->volno]; /* possible volume to which it points */ /* @@ -521,38 +539,22 @@ update_plex_state(int plexno) for (plexno = 0; plexno < vol->plexes; plexno++) forceup(VOL[plex->volno].plex[plexno]); } else if ((statemap == sd_initializedstate) /* if it's initialized (not empty) */ -||(plex->organization == plex_concat) /* and we're not RAID-5 */ +||(plex->organization == plex_concat) /* and we're not RAID-4 or RAID-5 */ ||(plex->organization == plex_striped)) forceup(plexno); /* we'll do it */ /* * This leaves a case where things don't get - * done: the plex is RAID-5, and the subdisks - * are all empty. They need to be initialized - * first. + * done: the plex is RAID-4 or RAID-5, and + * the subdisks are all empty. They need to + * be initialized first. */ - } else { /* another plex is up */ - int sdno; - - plex->state = plex_faulty; /* and bring it up */ - /* change the subdisks to up state */ - for (sdno = 0; sdno < plex->subdisks; sdno++) { - SD[plex->sdnos[sdno]].state = sd_stale; - log(LOG_INFO, /* tell them about it */ - "vinum: %s must be revived\n", - SD[plex->sdnos[sdno]].name); - } + } else { + if (statemap == sd_upstate) /* all subdisks up */ + plex->state = plex_up; /* we can come up too */ + else + plex->state = plex_faulty; } - } else if (statemap == sd_upstate) - /* - * All the subdisks are up. This also means that - * they are consistent, so we can just bring - * the plex up - */ - plex->state = plex_up; - else if ((plex->organization == plex_raid5) /* raid 5 plex */ - &&(plex->sddowncount == 1)) /* and exactly one subdisk down */ - plex->state = plex_degraded; /* limping a bit */ - else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */ + } else if ((statemap & (sd_upstate | sd_rebornstate)) == statemap) /* all up or reborn */ plex->state = plex_flaky; else if (statemap & (sd_upstate | sd_rebornstate)) /* some up or reborn */ plex->state = plex_corrupt; /* corrupt */ @@ -626,21 +628,25 @@ checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t disken /* * Access to a reviving subdisk depends on the * organization of the plex: - - * - If it's concatenated, access the subdisk up to its current - * revive point. If we want to write to the subdisk overlapping the - * current revive block, set the conflict flag in the request, asking - * the caller to put the request on the wait list, which will be - * attended to by revive_block when it's done. - * - if it's striped, we can't do it (we could do some hairy - * calculations, but it's unlikely to work). - * - if it's RAID-5, we can do it as long as only one - * subdisk is down + * + * - If it's concatenated, access the subdisk + * up to its current revive point. If we + * want to write to the subdisk overlapping + * the current revive block, set the + * conflict flag in the request, asking the + * caller to put the request on the wait + * list, which will be attended to by + * revive_block when it's done. + * - if it's striped, we can't do it (we could + * do some hairy calculations, but it's + * unlikely to work). + * - if it's RAID-4 or RAID-5, we can do it as + * long as only one subdisk is down */ if (plex->state == plex_striped) /* plex is striped, */ return REQUEST_DOWN; - else if (plex->state == plex_raid5) { /* RAID5 plex */ + else if (isparity(plex)) { /* RAID-4 or RAID-5 plex */ if (plex->sddowncount > 1) /* with more than one sd down, */ return REQUEST_DOWN; else @@ -770,9 +776,12 @@ vpstate(struct plex *plex) enum volplexstate state = volplex_onlyusdown; /* state to return */ int plexno; - if (plex->volno < 0) /* not associated with a volume */ - return volplex_onlyusdown; /* assume the worst */ - + if (plex->volno < 0) { /* not associated with a volume */ + if (plex->state > plex_degraded) + return volplex_onlyus; /* just us */ + else + return volplex_onlyusdown; /* assume the worst */ + } vol = &VOL[plex->volno]; /* point to our volume */ for (plexno = 0; plexno < vol->plexes; plexno++) { if (&PLEX[vol->plex[plexno]] == plex) { /* us */ @@ -856,6 +865,11 @@ start_object(struct vinum_ioctl_msg *data) break; case sd_object: + if (DRIVE[SD[objindex].driveno].state != drive_up) { + ioctl_reply->error = EIO; + strcpy(ioctl_reply->msg, "Drive is down"); + return; + } if ((SD[objindex].state == sd_reviving) /* reviving, */ ||(SD[objindex].state == sd_stale)) { /* or stale, will revive */ SD[objindex].state = sd_reviving; /* make sure we're reviving */ @@ -877,7 +891,8 @@ start_object(struct vinum_ioctl_msg *data) ioctl_reply->error = EBUSY; else ioctl_reply->error = 0; - } + } else + ioctl_reply->error = status; break; case plex_object: @@ -950,8 +965,8 @@ stop_object(struct vinum_ioctl_msg *data) } /* - * VINUM_SETSTATE ioctl: set an object state - * msg is the message passed by the user + * VINUM_SETSTATE ioctl: set an object state. + * msg is the message passed by the user. */ void setstate(struct vinum_ioctl_msg *msg)