mirror of
https://github.com/freebsd/freebsd-src
synced 2024-10-19 06:44:31 +00:00
Add support for >2TB disks in GEOM RAID for Intel metadata format.
Reviewed by: mav Approved by: scottl MFC after: 1 week
This commit is contained in:
parent
6791e4f442
commit
c1ad3fcf6a
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=229886
|
@ -251,7 +251,7 @@ complete it there.
|
|||
Do not run GEOM RAID class on migrating volumes under pain of possible data
|
||||
corruption!
|
||||
.Sh 2TiB BARRIERS
|
||||
Intel and Promise metadata formats do not support disks above 2TiB.
|
||||
Promise metadata format does not support disks above 2TiB.
|
||||
NVIDIA metadata format does not support volumes above 2TiB.
|
||||
.Sh EXIT STATUS
|
||||
Exit status is 0 on success, and non-zero if the command fails.
|
||||
|
|
|
@ -64,7 +64,10 @@ struct intel_raid_map {
|
|||
uint8_t total_domains;
|
||||
uint8_t failed_disk_num;
|
||||
uint8_t ddf;
|
||||
uint32_t filler_2[7];
|
||||
uint32_t offset_hi;
|
||||
uint32_t disk_sectors_hi;
|
||||
uint32_t stripe_count_hi;
|
||||
uint32_t filler_2[4];
|
||||
uint32_t disk_idx[1]; /* total_disks entries. */
|
||||
#define INTEL_DI_IDX 0x00ffffff
|
||||
#define INTEL_DI_RBLD 0x01000000
|
||||
|
@ -111,7 +114,8 @@ struct intel_raid_vol {
|
|||
uint8_t fs_state;
|
||||
uint16_t verify_errors;
|
||||
uint16_t bad_blocks;
|
||||
uint32_t filler_1[4];
|
||||
uint32_t curr_migr_unit_hi;
|
||||
uint32_t filler_1[3];
|
||||
struct intel_raid_map map[1]; /* 2 entries if migr_state != 0. */
|
||||
} __packed;
|
||||
|
||||
|
@ -125,8 +129,9 @@ struct intel_raid_disk {
|
|||
#define INTEL_F_ASSIGNED 0x02
|
||||
#define INTEL_F_FAILED 0x04
|
||||
#define INTEL_F_ONLINE 0x08
|
||||
|
||||
uint32_t filler[5];
|
||||
uint32_t owner_cfg_num;
|
||||
uint32_t sectors_hi;
|
||||
uint32_t filler[3];
|
||||
} __packed;
|
||||
|
||||
struct intel_raid_conf {
|
||||
|
@ -254,6 +259,82 @@ intel_get_volume(struct intel_raid_conf *meta, int i)
|
|||
return (mvol);
|
||||
}
|
||||
|
||||
static off_t
|
||||
intel_get_map_offset(struct intel_raid_map *mmap)
|
||||
{
|
||||
off_t offset = (off_t)mmap->offset_hi << 32;
|
||||
|
||||
offset += mmap->offset;
|
||||
return (offset);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_set_map_offset(struct intel_raid_map *mmap, off_t offset)
|
||||
{
|
||||
|
||||
mmap->offset = offset & 0xffffffff;
|
||||
mmap->offset_hi = offset >> 32;
|
||||
}
|
||||
|
||||
static off_t
|
||||
intel_get_map_disk_sectors(struct intel_raid_map *mmap)
|
||||
{
|
||||
off_t disk_sectors = (off_t)mmap->disk_sectors_hi << 32;
|
||||
|
||||
disk_sectors += mmap->disk_sectors;
|
||||
return (disk_sectors);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_set_map_disk_sectors(struct intel_raid_map *mmap, off_t disk_sectors)
|
||||
{
|
||||
|
||||
mmap->disk_sectors = disk_sectors & 0xffffffff;
|
||||
mmap->disk_sectors_hi = disk_sectors >> 32;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_set_map_stripe_count(struct intel_raid_map *mmap, off_t stripe_count)
|
||||
{
|
||||
|
||||
mmap->stripe_count = stripe_count & 0xffffffff;
|
||||
mmap->stripe_count_hi = stripe_count >> 32;
|
||||
}
|
||||
|
||||
static off_t
|
||||
intel_get_disk_sectors(struct intel_raid_disk *disk)
|
||||
{
|
||||
off_t sectors = (off_t)disk->sectors_hi << 32;
|
||||
|
||||
sectors += disk->sectors;
|
||||
return (sectors);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_set_disk_sectors(struct intel_raid_disk *disk, off_t sectors)
|
||||
{
|
||||
|
||||
disk->sectors = sectors & 0xffffffff;
|
||||
disk->sectors_hi = sectors >> 32;
|
||||
}
|
||||
|
||||
static off_t
|
||||
intel_get_vol_curr_migr_unit(struct intel_raid_vol *vol)
|
||||
{
|
||||
off_t curr_migr_unit = (off_t)vol->curr_migr_unit_hi << 32;
|
||||
|
||||
curr_migr_unit += vol->curr_migr_unit;
|
||||
return (curr_migr_unit);
|
||||
}
|
||||
|
||||
static void
|
||||
intel_set_vol_curr_migr_unit(struct intel_raid_vol *vol, off_t curr_migr_unit)
|
||||
{
|
||||
|
||||
vol->curr_migr_unit = curr_migr_unit & 0xffffffff;
|
||||
vol->curr_migr_unit_hi = curr_migr_unit >> 32;
|
||||
}
|
||||
|
||||
static void
|
||||
g_raid_md_intel_print(struct intel_raid_conf *meta)
|
||||
{
|
||||
|
@ -274,10 +355,11 @@ g_raid_md_intel_print(struct intel_raid_conf *meta)
|
|||
printf("attributes 0x%08x\n", meta->attributes);
|
||||
printf("total_disks %u\n", meta->total_disks);
|
||||
printf("total_volumes %u\n", meta->total_volumes);
|
||||
printf("DISK# serial disk_sectors disk_id flags\n");
|
||||
printf("DISK# serial disk_sectors disk_sectors_hi disk_id flags\n");
|
||||
for (i = 0; i < meta->total_disks; i++ ) {
|
||||
printf(" %d <%.16s> %u 0x%08x 0x%08x\n", i,
|
||||
printf(" %d <%.16s> %u %u 0x%08x 0x%08x\n", i,
|
||||
meta->disk[i].serial, meta->disk[i].sectors,
|
||||
meta->disk[i].sectors_hi,
|
||||
meta->disk[i].id, meta->disk[i].flags);
|
||||
}
|
||||
for (i = 0; i < meta->total_volumes; i++) {
|
||||
|
@ -288,6 +370,7 @@ g_raid_md_intel_print(struct intel_raid_conf *meta)
|
|||
printf(" state %u\n", mvol->state);
|
||||
printf(" reserved %u\n", mvol->reserved);
|
||||
printf(" curr_migr_unit %u\n", mvol->curr_migr_unit);
|
||||
printf(" curr_migr_unit_hi %u\n", mvol->curr_migr_unit_hi);
|
||||
printf(" checkpoint_id %u\n", mvol->checkpoint_id);
|
||||
printf(" migr_state %u\n", mvol->migr_state);
|
||||
printf(" migr_type %u\n", mvol->migr_type);
|
||||
|
@ -297,8 +380,11 @@ g_raid_md_intel_print(struct intel_raid_conf *meta)
|
|||
printf(" *** Map %d ***\n", j);
|
||||
mmap = intel_get_map(mvol, j);
|
||||
printf(" offset %u\n", mmap->offset);
|
||||
printf(" offset_hi %u\n", mmap->offset_hi);
|
||||
printf(" disk_sectors %u\n", mmap->disk_sectors);
|
||||
printf(" disk_sectors_hi %u\n", mmap->disk_sectors_hi);
|
||||
printf(" stripe_count %u\n", mmap->stripe_count);
|
||||
printf(" stripe_count_hi %u\n", mmap->stripe_count_hi);
|
||||
printf(" strip_sectors %u\n", mmap->strip_sectors);
|
||||
printf(" status %u\n", mmap->status);
|
||||
printf(" type %u\n", mmap->type);
|
||||
|
@ -660,12 +746,15 @@ g_raid_md_intel_start_disk(struct g_raid_disk *disk)
|
|||
continue;
|
||||
/* Make sure this disk is big enough. */
|
||||
TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) {
|
||||
off_t disk_sectors =
|
||||
intel_get_disk_sectors(&pd->pd_disk_meta);
|
||||
|
||||
if (sd->sd_offset + sd->sd_size + 4096 >
|
||||
(off_t)pd->pd_disk_meta.sectors * 512) {
|
||||
disk_sectors * 512) {
|
||||
G_RAID_DEBUG1(1, sc,
|
||||
"Disk too small (%llu < %llu)",
|
||||
((unsigned long long)
|
||||
pd->pd_disk_meta.sectors) * 512,
|
||||
(unsigned long long)
|
||||
disk_sectors * 512,
|
||||
(unsigned long long)
|
||||
sd->sd_offset + sd->sd_size + 4096);
|
||||
break;
|
||||
|
@ -788,7 +877,7 @@ g_raid_md_intel_start_disk(struct g_raid_disk *disk)
|
|||
sd->sd_rebuild_pos = 0;
|
||||
} else {
|
||||
sd->sd_rebuild_pos =
|
||||
(off_t)mvol->curr_migr_unit *
|
||||
intel_get_vol_curr_migr_unit(mvol) *
|
||||
sd->sd_volume->v_strip_size *
|
||||
mmap0->total_domains;
|
||||
}
|
||||
|
@ -815,7 +904,7 @@ g_raid_md_intel_start_disk(struct g_raid_disk *disk)
|
|||
sd->sd_rebuild_pos = 0;
|
||||
} else {
|
||||
sd->sd_rebuild_pos =
|
||||
(off_t)mvol->curr_migr_unit *
|
||||
intel_get_vol_curr_migr_unit(mvol) *
|
||||
sd->sd_volume->v_strip_size *
|
||||
mmap0->total_domains;
|
||||
}
|
||||
|
@ -967,8 +1056,8 @@ g_raid_md_intel_start(struct g_raid_softc *sc)
|
|||
vol->v_sectorsize = 512; //ZZZ
|
||||
for (j = 0; j < vol->v_disks_count; j++) {
|
||||
sd = &vol->v_subdisks[j];
|
||||
sd->sd_offset = (off_t)mmap->offset * 512; //ZZZ
|
||||
sd->sd_size = (off_t)mmap->disk_sectors * 512; //ZZZ
|
||||
sd->sd_offset = intel_get_map_offset(mmap) * 512; //ZZZ
|
||||
sd->sd_size = intel_get_map_disk_sectors(mmap) * 512; //ZZZ
|
||||
}
|
||||
g_raid_start_volume(vol);
|
||||
}
|
||||
|
@ -1176,9 +1265,6 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
|
|||
G_RAID_DEBUG(1,
|
||||
"Intel vendor mismatch 0x%04x != 0x8086",
|
||||
vendor);
|
||||
} else if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
|
||||
G_RAID_DEBUG(1,
|
||||
"Intel disk '%s' is too big.", pp->name);
|
||||
} else {
|
||||
G_RAID_DEBUG(1,
|
||||
"No Intel metadata, forcing spare.");
|
||||
|
@ -1195,10 +1281,10 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
|
|||
G_RAID_DEBUG(1, "Intel serial '%s' not found", serial);
|
||||
goto fail1;
|
||||
}
|
||||
if (meta->disk[disk_pos].sectors !=
|
||||
if (intel_get_disk_sectors(&meta->disk[disk_pos]) !=
|
||||
(pp->mediasize / pp->sectorsize)) {
|
||||
G_RAID_DEBUG(1, "Intel size mismatch %ju != %ju",
|
||||
(off_t)meta->disk[disk_pos].sectors,
|
||||
intel_get_disk_sectors(&meta->disk[disk_pos]),
|
||||
(off_t)(pp->mediasize / pp->sectorsize));
|
||||
goto fail1;
|
||||
}
|
||||
|
@ -1266,7 +1352,8 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp,
|
|||
pd->pd_disk_pos = -1;
|
||||
if (spare == 2) {
|
||||
memcpy(&pd->pd_disk_meta.serial[0], serial, INTEL_SERIAL_LEN);
|
||||
pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
|
||||
intel_set_disk_sectors(&pd->pd_disk_meta,
|
||||
pp->mediasize / pp->sectorsize);
|
||||
pd->pd_disk_meta.id = 0;
|
||||
pd->pd_disk_meta.flags = INTEL_F_SPARE;
|
||||
} else {
|
||||
|
@ -1372,7 +1459,7 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
const char *verb, *volname, *levelname, *diskname;
|
||||
char *tmp;
|
||||
int *nargs, *force;
|
||||
off_t off, size, sectorsize, strip;
|
||||
off_t off, size, sectorsize, strip, disk_sectors;
|
||||
intmax_t *sizearg, *striparg;
|
||||
int numdisks, i, len, level, qual, update;
|
||||
int error;
|
||||
|
@ -1452,13 +1539,6 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
cp->private = disk;
|
||||
g_topology_unlock();
|
||||
|
||||
if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
|
||||
gctl_error(req,
|
||||
"Disk '%s' is too big.", diskname);
|
||||
error = -8;
|
||||
break;
|
||||
}
|
||||
|
||||
error = g_raid_md_get_label(cp,
|
||||
&pd->pd_disk_meta.serial[0], INTEL_SERIAL_LEN);
|
||||
if (error != 0) {
|
||||
|
@ -1479,7 +1559,8 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
"Dumping not supported by %s.",
|
||||
cp->provider->name);
|
||||
|
||||
pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
|
||||
intel_set_disk_sectors(&pd->pd_disk_meta,
|
||||
pp->mediasize / pp->sectorsize);
|
||||
if (size > pp->mediasize)
|
||||
size = pp->mediasize;
|
||||
if (sectorsize < pp->sectorsize)
|
||||
|
@ -1544,10 +1625,6 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
gctl_error(req, "Size too small.");
|
||||
return (-13);
|
||||
}
|
||||
if (size > 0xffffffffllu * sectorsize) {
|
||||
gctl_error(req, "Size too big.");
|
||||
return (-14);
|
||||
}
|
||||
|
||||
/* We have all we need, create things: volume, ... */
|
||||
mdi->mdio_started = 1;
|
||||
|
@ -1655,8 +1732,11 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
disk = vol1->v_subdisks[i].sd_disk;
|
||||
pd = (struct g_raid_md_intel_perdisk *)
|
||||
disk->d_md_data;
|
||||
if ((off_t)pd->pd_disk_meta.sectors * 512 < size)
|
||||
size = (off_t)pd->pd_disk_meta.sectors * 512;
|
||||
disk_sectors =
|
||||
intel_get_disk_sectors(&pd->pd_disk_meta);
|
||||
|
||||
if (disk_sectors * 512 < size)
|
||||
size = disk_sectors * 512;
|
||||
if (disk->d_consumer != NULL &&
|
||||
disk->d_consumer->provider != NULL &&
|
||||
disk->d_consumer->provider->sectorsize >
|
||||
|
@ -1950,14 +2030,6 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
pp = cp->provider;
|
||||
g_topology_unlock();
|
||||
|
||||
if (pp->mediasize / pp->sectorsize > UINT32_MAX) {
|
||||
gctl_error(req,
|
||||
"Disk '%s' is too big.", diskname);
|
||||
g_raid_kill_consumer(sc, cp);
|
||||
error = -8;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Read disk serial. */
|
||||
error = g_raid_md_get_label(cp,
|
||||
&serial[0], INTEL_SERIAL_LEN);
|
||||
|
@ -1990,7 +2062,8 @@ g_raid_md_ctl_intel(struct g_raid_md_object *md,
|
|||
|
||||
memcpy(&pd->pd_disk_meta.serial[0], &serial[0],
|
||||
INTEL_SERIAL_LEN);
|
||||
pd->pd_disk_meta.sectors = pp->mediasize / pp->sectorsize;
|
||||
intel_set_disk_sectors(&pd->pd_disk_meta,
|
||||
pp->mediasize / pp->sectorsize);
|
||||
pd->pd_disk_meta.id = 0;
|
||||
pd->pd_disk_meta.flags = INTEL_F_SPARE;
|
||||
|
||||
|
@ -2165,8 +2238,8 @@ g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
|
|||
mmap0 = intel_get_map(mvol, 0);
|
||||
|
||||
/* Write map / common part of two maps. */
|
||||
mmap0->offset = sd->sd_offset / sectorsize;
|
||||
mmap0->disk_sectors = sd->sd_size / sectorsize;
|
||||
intel_set_map_offset(mmap0, sd->sd_offset / sectorsize);
|
||||
intel_set_map_disk_sectors(mmap0, sd->sd_size / sectorsize);
|
||||
mmap0->strip_sectors = vol->v_strip_size / sectorsize;
|
||||
if (vol->v_state == G_RAID_VOLUME_S_BROKEN)
|
||||
mmap0->status = INTEL_S_FAILURE;
|
||||
|
@ -2188,15 +2261,15 @@ g_raid_md_write_intel(struct g_raid_md_object *md, struct g_raid_volume *tvol,
|
|||
mmap0->total_domains = 2;
|
||||
else
|
||||
mmap0->total_domains = 1;
|
||||
mmap0->stripe_count = sd->sd_size / vol->v_strip_size /
|
||||
mmap0->total_domains;
|
||||
intel_set_map_stripe_count(mmap0,
|
||||
sd->sd_size / vol->v_strip_size / mmap0->total_domains);
|
||||
mmap0->failed_disk_num = 0xff;
|
||||
mmap0->ddf = 1;
|
||||
|
||||
/* If there are two maps - copy common and update. */
|
||||
if (mvol->migr_state) {
|
||||
mvol->curr_migr_unit = pos /
|
||||
vol->v_strip_size / mmap0->total_domains;
|
||||
intel_set_vol_curr_migr_unit(mvol,
|
||||
pos / vol->v_strip_size / mmap0->total_domains);
|
||||
mmap1 = intel_get_map(mvol, 1);
|
||||
memcpy(mmap1, mmap0, sizeof(struct intel_raid_map));
|
||||
mmap0->status = INTEL_S_READY;
|
||||
|
|
Loading…
Reference in a new issue