loader: create single zfs nextboot implementation

We should have nextboot feature implemented in libsa zfs code.
To get there, I have created zfs_nextboot() implementation based on
two sources, our current simple textual string based approach with added
structured boot label PAD structure from OpenZFS.

Secondly, all nvlist details are moved to separate source file and
restructured a bit. This is done to provide base support to add nvlist
add/update feature in followup updates.

And finally, the zfsboot/gptzfsboot disk access functions are swapped to use
libi386 and libsa.

Sponsored by:	Netflix, Klara Inc.
Differential Revision:	https://reviews.freebsd.org/D25324
This commit is contained in:
Toomas Soome 2020-06-20 06:23:31 +00:00
parent 830efe5539
commit 3830659e99
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=362431
14 changed files with 1432 additions and 1181 deletions

View file

@ -52,6 +52,8 @@ CFLAGS.efi_console.c+= -I${SRCTOP}/sys/teken
CFLAGS.teken.c+= -I${SRCTOP}/sys/teken
.if ${MK_LOADER_ZFS} != "no"
CFLAGS+= -I${ZFSSRC}
CFLAGS+= -I${SYSDIR}/cddl/boot/zfs
CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common
CFLAGS+= -DEFI_ZFS_BOOT
.endif

View file

@ -260,6 +260,8 @@ probe_zfs_currdev(uint64_t guid)
{
char *devname;
struct zfs_devdesc currdev;
char *buf = NULL;
bool rv;
currdev.dd.d_dev = &zfs_dev;
currdev.dd.d_unit = 0;
@ -269,7 +271,18 @@ probe_zfs_currdev(uint64_t guid)
devname = efi_fmtdev(&currdev);
init_zfs_bootenv(devname);
return (sanity_check_currdev());
rv = sanity_check_currdev();
if (rv) {
buf = malloc(VDEV_PAD_SIZE);
if (buf != NULL) {
if (zfs_nextboot(&currdev, buf, VDEV_PAD_SIZE) == 0) {
printf("zfs nextboot: %s\n", buf);
set_currdev(buf);
}
free(buf);
}
}
return (rv);
}
#endif

View file

@ -4,7 +4,7 @@
.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \
${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \
${SASRC}
${BOOTSRC}/common
FILES= gptzfsboot
MAN= gptzfsboot.8
@ -19,12 +19,16 @@ ORG2= 0x0
CFLAGS+=-DBOOTPROG=\"gptzfsboot\" \
-O1 \
-DGPT -DZFS -DBOOT2 \
-DBOOT2 \
-DLOADER_GPT_SUPPORT \
-DLOADER_MBR_SUPPORT \
-DLOADER_ZFS_SUPPORT \
-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
-DSIOFMT=${B2SIOFMT} \
-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
-I${LDRSRC} \
-I${BOOTSRC}/i386/common \
-I${BOOTSRC}/i386/libi386 \
-I${ZFSSRC} \
-I${SYSDIR}/crypto/skein \
-I${SYSDIR}/cddl/boot/zfs \
@ -60,15 +64,18 @@ gptldr.bin: gptldr.out
gptldr.out: gptldr.o
${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o
CLEANFILES+= gptzfsboot.bin gptzfsboot.out zfsboot.o sio.o cons.o \
drv.o gpt.o ${OPENCRYPTO_XTS}
OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o
CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS}
# i386 standalone support library
LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a
gptzfsboot.bin: gptzfsboot.out
${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET}
gptzfsboot.out: ${BTXCRT} zfsboot.o sio.o gpt.o drv.o cons.o \
gptzfsboot.out: ${BTXCRT} ${OBJS} \
${OPENCRYPTO_XTS}
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32}
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32}
zfsboot.o: ${ZFSSRC}/zfsimpl.c

View file

@ -37,6 +37,8 @@ CFLAGS+= -Dalloca=__builtin_alloca
CFLAGS+= -I${BOOTSRC}/ficl -I${BOOTSRC}/ficl/i386 \
-I${LDRSRC} -I${BOOTSRC}/i386/common \
-I${SYSDIR}/cddl/boot/zfs \
-I${SYSDIR}/cddl/contrib/opensolaris/uts/common \
-I${SYSDIR}/contrib/dev/acpica/include
# Handle FreeBSD specific %b and %D printf format specifiers

View file

@ -2,7 +2,7 @@
.include <bsd.init.mk>
.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${SASRC}
.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${BOOTSRC}/common
FILES= zfsboot
MAN= zfsboot.8
@ -17,13 +17,17 @@ ORG2= 0x2000
CFLAGS+=-DBOOTPROG=\"zfsboot\" \
-O1 \
-DZFS -DBOOT2 \
-DBOOT2 \
-DLOADER_GPT_SUPPORT \
-DLOADER_MBR_SUPPORT \
-DLOADER_ZFS_SUPPORT \
-DLOADER_UFS_SUPPORT \
-DSIOPRT=${BOOT_COMCONSOLE_PORT} \
-DSIOFMT=${B2SIOFMT} \
-DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
-I${LDRSRC} \
-I${BOOTSRC}/i386/common \
-I${BOOTSRC}/i386 \
-I${BOOTSRC}/i386/libi386 \
-I${ZFSSRC} \
-I${SYSDIR}/crypto/skein \
-I${SYSDIR}/cddl/boot/zfs \
@ -34,6 +38,8 @@ CFLAGS+=-DBOOTPROG=\"zfsboot\" \
-Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
-Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings
CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib
CFLAGS.gcc+= --param max-inline-insns-single=100
LD_FLAGS+=${LD_FLAGS_BIN}
@ -51,14 +57,18 @@ zfsboot1: zfsldr.out
zfsldr.out: zfsldr.o
${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o
OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o
CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \
zfsboot.o zfsboot.s zfsboot.s.tmp sio.o cons.o drv.o
${OBJS}
# We currently allow 256k bytes for zfsboot - in practice it could be
# any size up to 3.5Mb but keeping it fixed size simplifies zfsldr.
#
BOOT2SIZE= 262144
# i386 standalone support library
LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a
zfsboot2: zfsboot.ld
@set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \
echo "$$x bytes available"; test $$x -ge 0
@ -74,8 +84,8 @@ zfsboot.ldr:
zfsboot.bin: zfsboot.out
${OBJCOPY} -S -O binary zfsboot.out ${.TARGET}
zfsboot.out: ${BTXCRT} zfsboot.o sio.o drv.o cons.o
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32}
zfsboot.out: ${BTXCRT} ${OBJS}
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32}
SRCS= zfsboot.c

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
# $FreeBSD$
.PATH: ${ZFSSRC}
SRCS+= zfs.c skein.c skein_block.c list.c
SRCS+= zfs.c nvlist.c skein.c skein_block.c list.c
# Do not unroll skein loops, reduce code size
CFLAGS+= -DSKEIN_LOOP=111
.PATH: ${SYSDIR}/crypto/skein

View file

@ -26,6 +26,12 @@
* $FreeBSD$
*/
#include <zfsimpl.h>
#ifdef LOADER_GELI_SUPPORT
#include <crypto/intake.h>
#endif
#ifndef _BOOT_LIBZFS_H_
#define _BOOT_LIBZFS_H_
@ -40,13 +46,80 @@ struct zfs_devdesc {
uint64_t root_guid;
};
#ifdef LOADER_GELI_SUPPORT
#include <crypto/intake.h>
#endif
/* nvp implementation version */
#define NV_VERSION 0
/* nvlist persistent unique name flags, stored in nvl_nvflags */
#define NV_UNIQUE_NAME 0x1
#define NV_UNIQUE_NAME_TYPE 0x2
#define NV_ALIGN4(x) (((x) + 3) & ~3)
/*
* nvlist header.
* nvlist has 4 bytes header followed by version and flags, then nvpairs
* and the list is terminated by double zero.
*/
typedef struct {
char nvh_encoding;
char nvh_endian;
char nvh_reserved1;
char nvh_reserved2;
} nvs_header_t;
typedef struct {
nvs_header_t nv_header;
size_t nv_asize;
size_t nv_size;
uint8_t *nv_data;
uint8_t *nv_idx;
} nvlist_t;
/*
* nvpair header.
* nvpair has encoded and decoded size
* name string (size and data)
* data type and number of elements
* data
*/
typedef struct {
unsigned encoded_size;
unsigned decoded_size;
} nvp_header_t;
/*
* nvlist stream head.
*/
typedef struct {
unsigned nvl_version;
unsigned nvl_nvflag;
nvp_header_t nvl_pair;
} nvs_data_t;
typedef struct {
unsigned nv_size;
uint8_t nv_data[]; /* NV_ALIGN4(string) */
} nv_string_t;
typedef struct {
unsigned nv_type; /* data_type_t */
unsigned nv_nelem; /* number of elements */
uint8_t nv_data[]; /* data stream */
} nv_pair_data_t;
nvlist_t *nvlist_create(int);
void nvlist_destroy(nvlist_t *);
nvlist_t *nvlist_import(const uint8_t *, char, char);
int nvlist_remove(nvlist_t *, const char *, data_type_t);
void nvlist_print(nvlist_t *, unsigned int);
int nvlist_find(const nvlist_t *, const char *, data_type_t,
int *, void *, int *);
int nvlist_next(nvlist_t *);
int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec,
const char **path);
char *zfs_fmtdev(void *vdev);
int zfs_nextboot(void *vdev, char *buf, size_t size);
int zfs_probe_dev(const char *devname, uint64_t *pool_guid);
int zfs_list(const char *name);
uint64_t ldi_get_size(void *);

601
stand/libsa/zfs/nvlist.c Normal file
View file

@ -0,0 +1,601 @@
/*-
* Copyright 2020 Toomas Soome <tsoome@me.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <stand.h>
#include <sys/endian.h>
#include <zfsimpl.h>
#include "libzfs.h"
typedef struct xdr {
int (*xdr_getint)(const struct xdr *, const void *, int *);
} xdr_t;
static int xdr_int(const xdr_t *, const void *, int *);
static int mem_int(const xdr_t *, const void *, int *);
static void nvlist_decode_nvlist(const xdr_t *, nvlist_t *);
static int nvlist_size(const xdr_t *, const uint8_t *);
/*
* transform data from network to host.
*/
xdr_t ntoh = {
.xdr_getint = xdr_int
};
/*
* transform data from host to host.
*/
xdr_t native = {
.xdr_getint = mem_int
};
/*
* transform data from host to network.
*/
xdr_t hton = {
.xdr_getint = xdr_int
};
static int
xdr_short(const xdr_t *xdr, const uint8_t *buf, short *ip)
{
int i, rv;
rv = xdr->xdr_getint(xdr, buf, &i);
*ip = i;
return (rv);
}
static int
xdr_u_short(const xdr_t *xdr, const uint8_t *buf, unsigned short *ip)
{
unsigned u;
int rv;
rv = xdr->xdr_getint(xdr, buf, &u);
*ip = u;
return (rv);
}
static int
xdr_int(const xdr_t *xdr __unused, const void *buf, int *ip)
{
*ip = be32dec(buf);
return (sizeof(int));
}
static int
xdr_u_int(const xdr_t *xdr __unused, const void *buf, unsigned *ip)
{
*ip = be32dec(buf);
return (sizeof(unsigned));
}
static int
xdr_string(const xdr_t *xdr, const void *buf, nv_string_t *s)
{
int size;
size = xdr->xdr_getint(xdr, buf, &s->nv_size);
size = NV_ALIGN4(size + s->nv_size);
return (size);
}
static int
xdr_int64(const xdr_t *xdr, const uint8_t *buf, int64_t *lp)
{
int hi, rv;
unsigned lo;
rv = xdr->xdr_getint(xdr, buf, &hi);
rv += xdr->xdr_getint(xdr, buf + rv, &lo);
*lp = (((int64_t)hi) << 32) | lo;
return (rv);
}
static int
xdr_uint64(const xdr_t *xdr, const uint8_t *buf, uint64_t *lp)
{
unsigned hi, lo;
int rv;
rv = xdr->xdr_getint(xdr, buf, &hi);
rv += xdr->xdr_getint(xdr, buf + rv, &lo);
*lp = (((int64_t)hi) << 32) | lo;
return (rv);
}
static int
xdr_char(const xdr_t *xdr, const uint8_t *buf, char *cp)
{
int i, rv;
rv = xdr->xdr_getint(xdr, buf, &i);
*cp = i;
return (rv);
}
/*
* read native data.
*/
static int
mem_int(const xdr_t *xdr, const void *buf, int *i)
{
*i = *(int *)buf;
return (sizeof(int));
}
void
nvlist_destroy(nvlist_t *nvl)
{
if (nvl != NULL) {
/* Free data if it was allocated by us. */
if (nvl->nv_asize > 0)
free(nvl->nv_data);
}
free(nvl);
}
char *
nvstring_get(nv_string_t *nvs)
{
char *s;
s = malloc(nvs->nv_size + 1);
if (s != NULL) {
bcopy(nvs->nv_data, s, nvs->nv_size);
s[nvs->nv_size] = '\0';
}
return (s);
}
/*
* Create empty nvlist.
* The nvlist is terminated by 2x zeros (8 bytes).
*/
nvlist_t *
nvlist_create(int flag)
{
nvlist_t *nvl;
nvs_data_t *nvs;
nvl = calloc(1, sizeof(*nvl));
if (nvl == NULL)
return (nvl);
nvl->nv_header.nvh_encoding = NV_ENCODE_XDR;
nvl->nv_header.nvh_endian = _BYTE_ORDER == _LITTLE_ENDIAN;
nvl->nv_asize = nvl->nv_size = sizeof(*nvs);
nvs = calloc(1, nvl->nv_asize);
if (nvs == NULL) {
free(nvl);
return (NULL);
}
/* data in nvlist is byte stream */
nvl->nv_data = (uint8_t *)nvs;
nvs->nvl_version = NV_VERSION;
nvs->nvl_nvflag = flag;
return (nvl);
}
static void
nvlist_nvp_decode(const xdr_t *xdr, nvlist_t *nvl, nvp_header_t *nvph)
{
nv_string_t *nv_string;
nv_pair_data_t *nvp_data;
nvlist_t nvlist;
nv_string = (nv_string_t *)nvl->nv_idx;
nvl->nv_idx += xdr_string(xdr, &nv_string->nv_size, nv_string);
nvp_data = (nv_pair_data_t *)nvl->nv_idx;
nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_type, &nvp_data->nv_type);
nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_nelem, &nvp_data->nv_nelem);
switch (nvp_data->nv_type) {
case DATA_TYPE_NVLIST:
case DATA_TYPE_NVLIST_ARRAY:
bzero(&nvlist, sizeof (nvlist));
nvlist.nv_data = &nvp_data->nv_data[0];
nvlist.nv_idx = nvlist.nv_data;
for (int i = 0; i < nvp_data->nv_nelem; i++) {
nvlist.nv_asize =
nvlist_size(xdr, nvlist.nv_data);
nvlist_decode_nvlist(xdr, &nvlist);
nvl->nv_idx = nvlist.nv_idx;
nvlist.nv_data = nvlist.nv_idx;
}
break;
case DATA_TYPE_BOOLEAN:
/* BOOLEAN does not take value space */
break;
case DATA_TYPE_BYTE:
case DATA_TYPE_INT8:
case DATA_TYPE_UINT8:
nvl->nv_idx += xdr_char(xdr, &nvp_data->nv_data[0],
(char *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_INT16:
nvl->nv_idx += xdr_short(xdr, &nvp_data->nv_data[0],
(short *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_UINT16:
nvl->nv_idx += xdr_u_short(xdr, &nvp_data->nv_data[0],
(unsigned short *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_BOOLEAN_VALUE:
case DATA_TYPE_INT32:
nvl->nv_idx += xdr_int(xdr, &nvp_data->nv_data[0],
(int *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_UINT32:
nvl->nv_idx += xdr_u_int(xdr, &nvp_data->nv_data[0],
(unsigned *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_INT64:
nvl->nv_idx += xdr_int64(xdr, &nvp_data->nv_data[0],
(int64_t *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_UINT64:
nvl->nv_idx += xdr_uint64(xdr, &nvp_data->nv_data[0],
(uint64_t *)&nvp_data->nv_data[0]);
break;
case DATA_TYPE_STRING:
nv_string = (nv_string_t *)&nvp_data->nv_data[0];
nvl->nv_idx += xdr_string(xdr, &nvp_data->nv_data[0],
nv_string);
break;
}
}
static void
nvlist_decode_nvlist(const xdr_t *xdr, nvlist_t *nvl)
{
nvp_header_t *nvph;
nvs_data_t *nvs = (nvs_data_t *)nvl->nv_data;
nvl->nv_idx = nvl->nv_data;
nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_version,
&nvs->nvl_version);
nvl->nv_idx += xdr->xdr_getint(xdr, (const uint8_t *)&nvs->nvl_nvflag,
&nvs->nvl_nvflag);
nvph = &nvs->nvl_pair;
nvl->nv_idx += xdr->xdr_getint(xdr,
(const uint8_t *)&nvph->encoded_size, &nvph->encoded_size);
nvl->nv_idx += xdr->xdr_getint(xdr,
(const uint8_t *)&nvph->decoded_size, &nvph->decoded_size);
while (nvph->encoded_size && nvph->decoded_size) {
nvlist_nvp_decode(xdr, nvl, nvph);
nvph = (nvp_header_t *)(nvl->nv_idx);
nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->encoded_size,
&nvph->encoded_size);
nvl->nv_idx += xdr->xdr_getint(xdr, &nvph->decoded_size,
&nvph->decoded_size);
}
}
static int
nvlist_size(const xdr_t *xdr, const uint8_t *stream)
{
const uint8_t *p, *pair;
unsigned encoded_size, decoded_size;
p = stream;
p += 2 * sizeof(unsigned);
pair = p;
p += xdr->xdr_getint(xdr, p, &encoded_size);
p += xdr->xdr_getint(xdr, p, &decoded_size);
while (encoded_size && decoded_size) {
p = pair + encoded_size;
pair = p;
p += xdr->xdr_getint(xdr, p, &encoded_size);
p += xdr->xdr_getint(xdr, p, &decoded_size);
}
return (p - stream);
}
/*
* Import nvlist from byte stream.
* Determine the stream size and allocate private copy.
* Then translate the data.
*/
nvlist_t *
nvlist_import(const uint8_t *stream, char encoding, char endian)
{
nvlist_t *nvl;
if (encoding != NV_ENCODE_XDR)
return (NULL);
nvl = malloc(sizeof(*nvl));
if (nvl == NULL)
return (nvl);
nvl->nv_asize = nvl->nv_size = nvlist_size(&ntoh, stream);
nvl->nv_data = malloc(nvl->nv_asize);
if (nvl->nv_data == NULL) {
free(nvl);
return (NULL);
}
nvl->nv_idx = nvl->nv_data;
bcopy(stream, nvl->nv_data, nvl->nv_asize);
nvlist_decode_nvlist(&ntoh, nvl);
nvl->nv_idx = nvl->nv_data;
return (nvl);
}
/*
* remove pair from this nvlist.
*/
int
nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
{
uint8_t *head, *tail;
nvs_data_t *data;
nvp_header_t *nvp;
nv_string_t *nvp_name;
nv_pair_data_t *nvp_data;
size_t size;
if (nvl == NULL || nvl->nv_data == NULL || name == NULL)
return (EINVAL);
head = nvl->nv_data;
data = (nvs_data_t *)head;
nvp = &data->nvl_pair; /* first pair in nvlist */
head = (uint8_t *)nvp;
while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
nvp_name = (nv_string_t *)(head + sizeof(*nvp));
nvp_data = (nv_pair_data_t *)
NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] +
nvp_name->nv_size);
if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 &&
nvp_data->nv_type == type) {
/*
* set tail to point to next nvpair and size
* is the length of the tail.
*/
tail = head + nvp->encoded_size;
size = nvl->nv_data + nvl->nv_size - tail;
/* adjust the size of the nvlist. */
nvl->nv_size -= nvp->encoded_size;
bcopy(tail, head, size);
return (0);
}
/* Not our pair, skip to next. */
head = head + nvp->encoded_size;
nvp = (nvp_header_t *)head;
}
return (ENOENT);
}
int
nvlist_find(const nvlist_t *nvl, const char *name, data_type_t type,
int *elementsp, void *valuep, int *sizep)
{
nvs_data_t *data;
nvp_header_t *nvp;
nv_string_t *nvp_name;
nv_pair_data_t *nvp_data;
nvlist_t *nvlist;
if (nvl == NULL || nvl->nv_data == NULL || name == NULL)
return (EINVAL);
data = (nvs_data_t *)nvl->nv_data;
nvp = &data->nvl_pair; /* first pair in nvlist */
while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
nvp_name = (nv_string_t *)((uint8_t *)nvp + sizeof(*nvp));
nvp_data = (nv_pair_data_t *)
NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] +
nvp_name->nv_size);
if (memcmp(nvp_name->nv_data, name, nvp_name->nv_size) == 0 &&
nvp_data->nv_type == type) {
if (elementsp != NULL)
*elementsp = nvp_data->nv_nelem;
switch (nvp_data->nv_type) {
case DATA_TYPE_UINT64:
*(uint64_t *)valuep =
*(uint64_t *)nvp_data->nv_data;
return (0);
case DATA_TYPE_STRING:
nvp_name = (nv_string_t *)nvp_data->nv_data;
if (sizep != NULL) {
*sizep = nvp_name->nv_size;
}
*(const uint8_t **)valuep =
&nvp_name->nv_data[0];
return (0);
case DATA_TYPE_NVLIST:
case DATA_TYPE_NVLIST_ARRAY:
nvlist = malloc(sizeof(*nvlist));
if (nvlist != NULL) {
nvlist->nv_header = nvl->nv_header;
nvlist->nv_asize = 0;
nvlist->nv_size = 0;
nvlist->nv_idx = NULL;
nvlist->nv_data = &nvp_data->nv_data[0];
*(nvlist_t **)valuep = nvlist;
return (0);
}
return (ENOMEM);
}
return (EIO);
}
/* Not our pair, skip to next. */
nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size);
}
return (ENOENT);
}
/*
* Return the next nvlist in an nvlist array.
*/
int
nvlist_next(nvlist_t *nvl)
{
nvs_data_t *data;
nvp_header_t *nvp;
if (nvl == NULL || nvl->nv_data == NULL || nvl->nv_asize != 0)
return (EINVAL);
data = (nvs_data_t *)nvl->nv_data;
nvp = &data->nvl_pair; /* first pair in nvlist */
while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size);
}
nvl->nv_data = (uint8_t *)nvp + sizeof(*nvp);
return (0);
}
void
nvlist_print(nvlist_t *nvl, unsigned int indent)
{
static const char *typenames[] = {
"DATA_TYPE_UNKNOWN",
"DATA_TYPE_BOOLEAN",
"DATA_TYPE_BYTE",
"DATA_TYPE_INT16",
"DATA_TYPE_UINT16",
"DATA_TYPE_INT32",
"DATA_TYPE_UINT32",
"DATA_TYPE_INT64",
"DATA_TYPE_UINT64",
"DATA_TYPE_STRING",
"DATA_TYPE_BYTE_ARRAY",
"DATA_TYPE_INT16_ARRAY",
"DATA_TYPE_UINT16_ARRAY",
"DATA_TYPE_INT32_ARRAY",
"DATA_TYPE_UINT32_ARRAY",
"DATA_TYPE_INT64_ARRAY",
"DATA_TYPE_UINT64_ARRAY",
"DATA_TYPE_STRING_ARRAY",
"DATA_TYPE_HRTIME",
"DATA_TYPE_NVLIST",
"DATA_TYPE_NVLIST_ARRAY",
"DATA_TYPE_BOOLEAN_VALUE",
"DATA_TYPE_INT8",
"DATA_TYPE_UINT8",
"DATA_TYPE_BOOLEAN_ARRAY",
"DATA_TYPE_INT8_ARRAY",
"DATA_TYPE_UINT8_ARRAY"
};
nvs_data_t *data;
nvp_header_t *nvp;
nv_string_t *nvp_name;
nv_pair_data_t *nvp_data;
nvlist_t nvlist;
int i, j;
data = (nvs_data_t *)nvl->nv_data;
nvp = &data->nvl_pair; /* first pair in nvlist */
while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp));
nvp_data = (nv_pair_data_t *)
NV_ALIGN4((uintptr_t)&nvp_name->nv_data[0] +
nvp_name->nv_size);
for (int i = 0; i < indent; i++)
printf(" ");
printf("%s [%d] %.*s", typenames[nvp_data->nv_type],
nvp_data->nv_nelem, nvp_name->nv_size, nvp_name->nv_data);
switch (nvp_data->nv_type) {
case DATA_TYPE_UINT64: {
uint64_t val;
val = *(uint64_t *)nvp_data->nv_data;
printf(" = 0x%jx\n", (uintmax_t)val);
break;
}
case DATA_TYPE_STRING: {
nvp_name = (nv_string_t *)&nvp_data->nv_data[0];
printf(" = \"%.*s\"\n", nvp_name->nv_size,
nvp_name->nv_data );
break;
}
case DATA_TYPE_NVLIST:
printf("\n");
nvlist.nv_data = &nvp_data->nv_data[0];
nvlist_print(&nvlist, indent + 2);
break;
case DATA_TYPE_NVLIST_ARRAY:
nvlist.nv_data = &nvp_data->nv_data[0];
for (j = 0; j < nvp_data->nv_nelem; j++) {
data = (nvs_data_t *)nvlist.nv_data;
printf("[%d]\n", j);
nvlist_print(&nvlist, indent + 2);
if (j != nvp_data->nv_nelem - 1) {
for (i = 0; i < indent; i++)
printf(" ");
printf("%s %.*s",
typenames[nvp_data->nv_type],
nvp_name->nv_size,
nvp_name->nv_data);
}
nvlist.nv_data = (uint8_t *)data +
nvlist_size(&native, nvlist.nv_data);
}
break;
default:
printf("\n");
}
nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size);
}
printf("%*s\n", indent + 13, "End of nvlist");
}

View file

@ -482,6 +482,215 @@ vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
return (ret);
}
static int
vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf,
size_t bytes)
{
int fd, ret;
size_t head, tail, total_size, full_sec_size;
unsigned secsz, do_tail_write;
off_t start_sec;
ssize_t res;
char *outbuf, *bouncebuf;
fd = (uintptr_t)priv;
outbuf = (char *) buf;
bouncebuf = NULL;
ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
if (ret != 0)
return (ret);
start_sec = offset / secsz;
head = offset % secsz;
total_size = roundup2(head + bytes, secsz);
tail = total_size - (head + bytes);
do_tail_write = ((tail > 0) && (head + bytes > secsz));
full_sec_size = total_size;
if (head > 0)
full_sec_size -= secsz;
if (do_tail_write)
full_sec_size -= secsz;
/* Partial sector write requires a bounce buffer. */
if ((head > 0) || do_tail_write || bytes < secsz) {
bouncebuf = malloc(secsz);
if (bouncebuf == NULL) {
printf("vdev_write: out of memory\n");
return (ENOMEM);
}
}
if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
ret = errno;
goto error;
}
/* Partial data for first sector */
if (head > 0) {
res = read(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
(void) lseek(fd, -secsz, SEEK_CUR);
res = write(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
outbuf += min(secsz - head, bytes);
}
/*
* Full data write to sectors.
* Note, there is still corner case where we write
* to sector boundary, but less than sector size, e.g. write 512B
* to 4k sector.
*/
if (full_sec_size > 0) {
if (bytes < full_sec_size) {
res = read(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
memcpy(bouncebuf, outbuf, bytes);
(void) lseek(fd, -secsz, SEEK_CUR);
res = write(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
} else {
res = write(fd, outbuf, full_sec_size);
if (res != full_sec_size) {
ret = EIO;
goto error;
}
outbuf += full_sec_size;
}
}
/* Partial data write to last sector */
if (do_tail_write) {
res = read(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
memcpy(bouncebuf, outbuf, secsz - tail);
(void) lseek(fd, -secsz, SEEK_CUR);
res = write(fd, bouncebuf, secsz);
if (res != secsz) {
ret = EIO;
goto error;
}
}
ret = 0;
error:
free(bouncebuf);
return (ret);
}
static void
vdev_clear_pad2(vdev_t *vdev)
{
vdev_t *kid;
vdev_boot_envblock_t *be;
off_t off = offsetof(vdev_label_t, vl_be);
zio_checksum_info_t *ci;
zio_cksum_t cksum;
STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
if (kid->v_state != VDEV_STATE_HEALTHY)
continue;
vdev_clear_pad2(kid);
}
if (!STAILQ_EMPTY(&vdev->v_children))
return;
be = calloc(1, sizeof (*be));
if (be == NULL) {
printf("failed to clear be area: out of memory\n");
return;
}
ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL];
be->vbe_zbt.zec_magic = ZEC_MAGIC;
zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off);
ci->ci_func[0](be, sizeof (*be), NULL, &cksum);
be->vbe_zbt.zec_cksum = cksum;
if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) {
printf("failed to clear be area of primary vdev: %d\n",
errno);
}
free(be);
}
/*
* Read the next boot command from pad2.
* If any instance of pad2 is set to empty string, or the returned string
* values are not the same, we consider next boot not to be set.
*/
static char *
vdev_read_pad2(vdev_t *vdev)
{
vdev_t *kid;
char *tmp, *result = NULL;
vdev_boot_envblock_t *be;
off_t off = offsetof(vdev_label_t, vl_be);
STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
if (kid->v_state != VDEV_STATE_HEALTHY)
continue;
tmp = vdev_read_pad2(kid);
if (tmp == NULL)
continue;
/* The next boot is not set, we are done. */
if (*tmp == '\0') {
free(result);
return (tmp);
}
if (result == NULL) {
result = tmp;
continue;
}
/* Are the next boot strings different? */
if (strcmp(result, tmp) != 0) {
free(tmp);
*result = '\0';
break;
}
free(tmp);
}
if (result != NULL)
return (result);
be = malloc(sizeof (*be));
if (be == NULL)
return (NULL);
if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) {
return (NULL);
}
switch (be->vbe_version) {
case VB_RAW:
case VB_NVLIST:
result = strdup(be->vbe_bootenv);
default:
/* Backward compatibility with initial nextboot feaure. */
result = strdup((char *)be);
}
return (result);
}
static int
zfs_dev_init(void)
{
@ -558,7 +767,7 @@ zfs_probe_partition(void *arg, const char *partname,
strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
devname[strlen(ppa->devname) - 1] = '\0';
sprintf(devname, "%s%s:", devname, partname);
pa.fd = open(devname, O_RDONLY);
pa.fd = open(devname, O_RDWR);
if (pa.fd == -1)
return (0);
ret = zfs_probe(pa.fd, ppa->pool_guid);
@ -580,6 +789,57 @@ zfs_probe_partition(void *arg, const char *partname,
return (0);
}
int
zfs_nextboot(void *vdev, char *buf, size_t size)
{
struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
spa_t *spa;
vdev_t *vd;
char *result = NULL;
if (dev->dd.d_dev->dv_type != DEVT_ZFS)
return (1);
if (dev->pool_guid == 0)
spa = STAILQ_FIRST(&zfs_pools);
else
spa = spa_find_by_guid(dev->pool_guid);
if (spa == NULL) {
printf("ZFS: can't find pool by guid\n");
return (1);
}
STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
char *tmp = vdev_read_pad2(vd);
/* Continue on error. */
if (tmp == NULL)
continue;
/* Nextboot is not set. */
if (*tmp == '\0') {
free(result);
free(tmp);
return (1);
}
if (result == NULL) {
result = tmp;
continue;
}
free(tmp);
}
if (result == NULL)
return (1);
STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
vdev_clear_pad2(vd);
}
strlcpy(buf, result, size);
free(result);
return (0);
}
int
zfs_probe_dev(const char *devname, uint64_t *pool_guid)
{
@ -591,7 +851,7 @@ zfs_probe_dev(const char *devname, uint64_t *pool_guid)
if (pool_guid)
*pool_guid = 0;
pa.fd = open(devname, O_RDONLY);
pa.fd = open(devname, O_RDWR);
if (pa.fd == -1)
return (ENXIO);
/*

View file

@ -170,284 +170,48 @@ zfs_init(void)
}
static int
xdr_int(const unsigned char **xdr, int *ip)
nvlist_check_features_for_read(nvlist_t *nvl)
{
*ip = be32dec(*xdr);
(*xdr) += 4;
return (0);
}
static int
xdr_u_int(const unsigned char **xdr, u_int *ip)
{
*ip = be32dec(*xdr);
(*xdr) += 4;
return (0);
}
static int
xdr_uint64_t(const unsigned char **xdr, uint64_t *lp)
{
u_int hi, lo;
xdr_u_int(xdr, &hi);
xdr_u_int(xdr, &lo);
*lp = (((uint64_t)hi) << 32) | lo;
return (0);
}
static int
nvlist_find(const unsigned char *nvlist, const char *name, int type,
int *elementsp, void *valuep, int *sizep)
{
const unsigned char *p, *pair;
int junk;
int encoded_size, decoded_size;
p = nvlist;
xdr_int(&p, &junk);
xdr_int(&p, &junk);
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
while (encoded_size && decoded_size) {
int namelen, pairtype, elements;
const char *pairname;
xdr_int(&p, &namelen);
pairname = (const char *)p;
p += roundup(namelen, 4);
xdr_int(&p, &pairtype);
if (memcmp(name, pairname, namelen) == 0 && type == pairtype) {
xdr_int(&p, &elements);
if (elementsp)
*elementsp = elements;
if (type == DATA_TYPE_UINT64) {
xdr_uint64_t(&p, (uint64_t *)valuep);
return (0);
} else if (type == DATA_TYPE_STRING) {
int len;
xdr_int(&p, &len);
if (sizep != NULL)
*sizep = len;
(*(const char **)valuep) = (const char *)p;
return (0);
} else if (type == DATA_TYPE_NVLIST ||
type == DATA_TYPE_NVLIST_ARRAY) {
(*(const unsigned char **)valuep) =
(const unsigned char *)p;
return (0);
} else {
return (EIO);
}
} else {
/*
* Not the pair we are looking for, skip to the
* next one.
*/
p = pair + encoded_size;
}
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
}
return (EIO);
}
static int
nvlist_check_features_for_read(const unsigned char *nvlist)
{
const unsigned char *p, *pair;
int junk;
int encoded_size, decoded_size;
nvlist_t *features = NULL;
nvs_data_t *data;
nvp_header_t *nvp;
nv_string_t *nvp_name;
int rc;
rc = 0;
rc = nvlist_find(nvl, ZPOOL_CONFIG_FEATURES_FOR_READ,
DATA_TYPE_NVLIST, NULL, &features, NULL);
if (rc != 0)
return (rc);
p = nvlist;
xdr_int(&p, &junk);
xdr_int(&p, &junk);
data = (nvs_data_t *)features->nv_data;
nvp = &data->nvl_pair; /* first pair in nvlist */
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
while (encoded_size && decoded_size) {
int namelen, pairtype;
const char *pairname;
while (nvp->encoded_size != 0 && nvp->decoded_size != 0) {
int i, found;
nvp_name = (nv_string_t *)((uintptr_t)nvp + sizeof(*nvp));
found = 0;
xdr_int(&p, &namelen);
pairname = (const char *)p;
p += roundup(namelen, 4);
xdr_int(&p, &pairtype);
for (i = 0; features_for_read[i] != NULL; i++) {
if (memcmp(pairname, features_for_read[i],
namelen) == 0) {
if (memcmp(nvp_name->nv_data, features_for_read[i],
nvp_name->nv_size) == 0) {
found = 1;
break;
}
}
if (!found) {
printf("ZFS: unsupported feature: %s\n", pairname);
printf("ZFS: unsupported feature: %.*s\n",
nvp_name->nv_size, nvp_name->nv_data);
rc = EIO;
}
p = pair + encoded_size;
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
nvp = (nvp_header_t *)((uint8_t *)nvp + nvp->encoded_size);
}
nvlist_destroy(features);
return (rc);
}
/*
* Return the next nvlist in an nvlist array.
*/
static const unsigned char *
nvlist_next(const unsigned char *nvlist)
{
const unsigned char *p, *pair;
int junk;
int encoded_size, decoded_size;
p = nvlist;
xdr_int(&p, &junk);
xdr_int(&p, &junk);
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
while (encoded_size && decoded_size) {
p = pair + encoded_size;
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
}
return (p);
}
#ifdef TEST
static const unsigned char *
nvlist_print(const unsigned char *nvlist, unsigned int indent)
{
static const char *typenames[] = {
"DATA_TYPE_UNKNOWN",
"DATA_TYPE_BOOLEAN",
"DATA_TYPE_BYTE",
"DATA_TYPE_INT16",
"DATA_TYPE_UINT16",
"DATA_TYPE_INT32",
"DATA_TYPE_UINT32",
"DATA_TYPE_INT64",
"DATA_TYPE_UINT64",
"DATA_TYPE_STRING",
"DATA_TYPE_BYTE_ARRAY",
"DATA_TYPE_INT16_ARRAY",
"DATA_TYPE_UINT16_ARRAY",
"DATA_TYPE_INT32_ARRAY",
"DATA_TYPE_UINT32_ARRAY",
"DATA_TYPE_INT64_ARRAY",
"DATA_TYPE_UINT64_ARRAY",
"DATA_TYPE_STRING_ARRAY",
"DATA_TYPE_HRTIME",
"DATA_TYPE_NVLIST",
"DATA_TYPE_NVLIST_ARRAY",
"DATA_TYPE_BOOLEAN_VALUE",
"DATA_TYPE_INT8",
"DATA_TYPE_UINT8",
"DATA_TYPE_BOOLEAN_ARRAY",
"DATA_TYPE_INT8_ARRAY",
"DATA_TYPE_UINT8_ARRAY"
};
unsigned int i, j;
const unsigned char *p, *pair;
int junk;
int encoded_size, decoded_size;
p = nvlist;
xdr_int(&p, &junk);
xdr_int(&p, &junk);
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
while (encoded_size && decoded_size) {
int namelen, pairtype, elements;
const char *pairname;
xdr_int(&p, &namelen);
pairname = (const char *)p;
p += roundup(namelen, 4);
xdr_int(&p, &pairtype);
for (i = 0; i < indent; i++)
printf(" ");
printf("%s %.*s", typenames[pairtype], namelen, pairname);
xdr_int(&p, &elements);
switch (pairtype) {
case DATA_TYPE_UINT64: {
uint64_t val;
xdr_uint64_t(&p, &val);
printf(" = 0x%jx\n", (uintmax_t)val);
break;
}
case DATA_TYPE_STRING: {
int len;
xdr_int(&p, &len);
printf(" = \"%.*s\"\n", len, p);
break;
}
case DATA_TYPE_NVLIST:
printf("\n");
nvlist_print(p, indent + 1);
break;
case DATA_TYPE_NVLIST_ARRAY:
for (j = 0; j < elements; j++) {
printf("[%d]\n", j);
p = nvlist_print(p, indent + 1);
if (j != elements - 1) {
for (i = 0; i < indent; i++)
printf(" ");
printf("%s %.*s", typenames[pairtype],
namelen, pairname);
}
}
break;
default:
printf("\n");
}
p = pair + encoded_size;
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
}
return (p);
}
#endif
static int
vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
off_t offset, size_t size)
@ -1082,7 +846,7 @@ vdev_create(uint64_t guid, vdev_read_t *_read)
}
static void
vdev_set_initial_state(vdev_t *vdev, const unsigned char *nvlist)
vdev_set_initial_state(vdev_t *vdev, const nvlist_t *nvlist)
{
uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present;
uint64_t is_log;
@ -1117,7 +881,7 @@ vdev_set_initial_state(vdev_t *vdev, const unsigned char *nvlist)
}
static int
vdev_init(uint64_t guid, const unsigned char *nvlist, vdev_t **vdevp)
vdev_init(uint64_t guid, const nvlist_t *nvlist, vdev_t **vdevp)
{
uint64_t id, ashift, asize, nparity;
const char *path;
@ -1128,8 +892,8 @@ vdev_init(uint64_t guid, const unsigned char *nvlist, vdev_t **vdevp)
if (nvlist_find(nvlist, ZPOOL_CONFIG_ID, DATA_TYPE_UINT64, NULL, &id,
NULL) ||
nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING,
NULL, &type, &len)) {
nvlist_find(nvlist, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, NULL,
&type, &len)) {
return (ENOENT);
}
@ -1306,10 +1070,10 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev)
}
static int
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist)
vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist)
{
vdev_t *top_vdev, *vdev;
const unsigned char *kids;
nvlist_t *kids = NULL;
int rc, nkids;
/* Get top vdev. */
@ -1332,8 +1096,10 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist)
rc = nvlist_find(kids, ZPOOL_CONFIG_GUID,
DATA_TYPE_UINT64, NULL, &guid, NULL);
if (rc != 0)
if (rc != 0) {
nvlist_destroy(kids);
return (rc);
}
rc = vdev_init(guid, kids, &vdev);
if (rc != 0)
return (rc);
@ -1342,7 +1108,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist)
vdev->v_top = top_vdev;
vdev_insert(top_vdev, vdev);
kids = nvlist_next(kids);
rc = nvlist_next(kids);
}
} else {
/*
@ -1351,15 +1117,17 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const unsigned char *nvlist)
*/
rc = 0;
}
nvlist_destroy(kids);
return (rc);
}
static int
vdev_init_from_label(spa_t *spa, const unsigned char *nvlist)
vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist)
{
uint64_t pool_guid, top_guid;
const unsigned char *vdevs;
nvlist_t *vdevs;
int rc;
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
NULL, &pool_guid, NULL) ||
@ -1371,7 +1139,9 @@ vdev_init_from_label(spa_t *spa, const unsigned char *nvlist)
return (ENOENT);
}
return (vdev_from_nvlist(spa, top_guid, vdevs));
rc = vdev_from_nvlist(spa, top_guid, vdevs);
nvlist_destroy(vdevs);
return (rc);
}
static void
@ -1420,10 +1190,10 @@ vdev_set_state(vdev_t *vdev)
}
static int
vdev_update_from_nvlist(uint64_t top_guid, const unsigned char *nvlist)
vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist)
{
vdev_t *vdev;
const unsigned char *kids;
nvlist_t *kids = NULL;
int rc, nkids;
/* Update top vdev. */
@ -1447,20 +1217,21 @@ vdev_update_from_nvlist(uint64_t top_guid, const unsigned char *nvlist)
if (vdev != NULL)
vdev_set_initial_state(vdev, kids);
kids = nvlist_next(kids);
rc = nvlist_next(kids);
}
} else {
rc = 0;
}
nvlist_destroy(kids);
return (rc);
}
static int
vdev_init_from_nvlist(spa_t *spa, const unsigned char *nvlist)
vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist)
{
uint64_t pool_guid, vdev_children;
const unsigned char *vdevs, *kids;
nvlist_t *vdevs = NULL, *kids = NULL;
int rc, nkids;
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
@ -1474,13 +1245,16 @@ vdev_init_from_nvlist(spa_t *spa, const unsigned char *nvlist)
}
/* Wrong guid?! */
if (spa->spa_guid != pool_guid)
if (spa->spa_guid != pool_guid) {
nvlist_destroy(vdevs);
return (EINVAL);
}
spa->spa_root_vdev->v_nchildren = vdev_children;
rc = nvlist_find(vdevs, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY,
&nkids, &kids, NULL);
nvlist_destroy(vdevs);
/*
* MOS config has at least one child for root vdev.
@ -1506,8 +1280,9 @@ vdev_init_from_nvlist(spa_t *spa, const unsigned char *nvlist)
rc = vdev_update_from_nvlist(guid, kids);
if (rc != 0)
break;
kids = nvlist_next(kids);
nvlist_next(kids);
}
nvlist_destroy(kids);
/*
* Re-evaluate top-level vdev state.
@ -1819,26 +1594,20 @@ vdev_label_read(vdev_t *vd, int l, void *buf, uint64_t offset,
return (vdev_read_phys(vd, &bp, buf, off, size));
}
static unsigned char *
static nvlist_t *
vdev_label_read_config(vdev_t *vd, uint64_t txg)
{
vdev_phys_t *label;
uint64_t best_txg = 0;
uint64_t label_txg = 0;
uint64_t asize;
unsigned char *nvl;
size_t nvl_size;
nvlist_t *nvl = NULL, *tmp;
int error;
label = malloc(sizeof (vdev_phys_t));
if (label == NULL)
return (NULL);
nvl_size = VDEV_PHYS_SIZE - sizeof (zio_eck_t) - 4;
nvl = malloc(nvl_size);
if (nvl == NULL)
goto done;
for (int l = 0; l < VDEV_LABELS; l++) {
const unsigned char *nvlist;
@ -1847,35 +1616,40 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
sizeof (vdev_phys_t)))
continue;
if (label->vp_nvlist[0] != NV_ENCODE_XDR)
nvlist = (const unsigned char *) label->vp_nvlist;
tmp = nvlist_import(nvlist + 4, nvlist[0], nvlist[1]);
if (tmp == NULL)
continue;
nvlist = (const unsigned char *) label->vp_nvlist + 4;
error = nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG,
error = nvlist_find(tmp, ZPOOL_CONFIG_POOL_TXG,
DATA_TYPE_UINT64, NULL, &label_txg, NULL);
if (error != 0 || label_txg == 0) {
memcpy(nvl, nvlist, nvl_size);
nvlist_destroy(nvl);
nvl = tmp;
goto done;
}
if (label_txg <= txg && label_txg > best_txg) {
best_txg = label_txg;
memcpy(nvl, nvlist, nvl_size);
nvlist_destroy(nvl);
nvl = tmp;
tmp = NULL;
/*
* Use asize from pool config. We need this
* because we can get bad value from BIOS.
*/
if (nvlist_find(nvlist, ZPOOL_CONFIG_ASIZE,
if (nvlist_find(nvl, ZPOOL_CONFIG_ASIZE,
DATA_TYPE_UINT64, NULL, &asize, NULL) == 0) {
vd->v_psize = asize +
VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
}
}
nvlist_destroy(tmp);
}
if (best_txg == 0) {
free(nvl);
nvlist_destroy(nvl);
nvl = NULL;
}
done:
@ -1914,12 +1688,11 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
vdev_t vtmp;
spa_t *spa;
vdev_t *vdev;
unsigned char *nvlist;
nvlist_t *nvl;
uint64_t val;
uint64_t guid, vdev_children;
uint64_t pool_txg, pool_guid;
const char *pool_name;
const unsigned char *features;
int rc, namelen;
/*
@ -1936,54 +1709,53 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
if (vtmp.v_psize < SPA_MINDEVSIZE)
return (EIO);
nvlist = vdev_label_read_config(&vtmp, UINT64_MAX);
if (nvlist == NULL)
nvl = vdev_label_read_config(&vtmp, UINT64_MAX);
if (nvl == NULL)
return (EIO);
if (nvlist_find(nvlist, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
if (nvlist_find(nvl, ZPOOL_CONFIG_VERSION, DATA_TYPE_UINT64,
NULL, &val, NULL) != 0) {
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
if (!SPA_VERSION_IS_SUPPORTED(val)) {
printf("ZFS: unsupported ZFS version %u (should be %u)\n",
(unsigned)val, (unsigned)SPA_VERSION);
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
/* Check ZFS features for read */
if (nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
DATA_TYPE_NVLIST, NULL, &features, NULL) == 0 &&
nvlist_check_features_for_read(features) != 0) {
free(nvlist);
rc = nvlist_check_features_for_read(nvl);
if (rc != 0) {
nvlist_destroy(nvl);
return (EIO);
}
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_STATE, DATA_TYPE_UINT64,
NULL, &val, NULL) != 0) {
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
if (val == POOL_STATE_DESTROYED) {
/* We don't boot only from destroyed pools. */
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64,
NULL, &pool_txg, NULL) != 0 ||
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
nvlist_find(nvl, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64,
NULL, &pool_guid, NULL) != 0 ||
nvlist_find(nvlist, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING,
nvlist_find(nvl, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING,
NULL, &pool_name, &namelen) != 0) {
/*
* Cache and spare devices end up here - just ignore
* them.
*/
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
@ -1994,11 +1766,11 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
if (spa == NULL) {
char *name;
nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_CHILDREN,
nvlist_find(nvl, ZPOOL_CONFIG_VDEV_CHILDREN,
DATA_TYPE_UINT64, NULL, &vdev_children, NULL);
name = malloc(namelen + 1);
if (name == NULL) {
free(nvlist);
nvlist_destroy(nvl);
return (ENOMEM);
}
bcopy(pool_name, name, namelen);
@ -2006,7 +1778,7 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
spa = spa_create(pool_guid, name);
free(name);
if (spa == NULL) {
free(nvlist);
nvlist_destroy(nvl);
return (ENOMEM);
}
spa->spa_root_vdev->v_nchildren = vdev_children;
@ -2020,20 +1792,20 @@ vdev_probe(vdev_phys_read_t *_read, void *read_priv, spa_t **spap)
* be some kind of alias (overlapping slices, dangerously dedicated
* disks etc).
*/
if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
if (nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64,
NULL, &guid, NULL) != 0) {
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
vdev = vdev_find(guid);
/* Has this vdev already been inited? */
if (vdev && vdev->v_phys_read) {
free(nvlist);
nvlist_destroy(nvl);
return (EIO);
}
rc = vdev_init_from_label(spa, nvlist);
free(nvlist);
rc = vdev_init_from_label(spa, nvl);
nvlist_destroy(nvl);
if (rc != 0)
return (rc);
@ -2211,6 +1983,8 @@ zio_read(const spa_t *spa, const blkptr_t *bp, void *buf)
BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp));
else if (size != BP_GET_PSIZE(bp))
bcopy(pbuf, buf, BP_GET_PSIZE(bp));
} else {
printf("zio_read error: %d\n", error);
}
if (buf != pbuf)
free(pbuf);
@ -3307,7 +3081,7 @@ check_mos_features(const spa_t *spa)
}
static int
load_nvlist(spa_t *spa, uint64_t obj, unsigned char **value)
load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
{
dnode_phys_t dir;
size_t size;
@ -3336,7 +3110,8 @@ load_nvlist(spa_t *spa, uint64_t obj, unsigned char **value)
nv = NULL;
return (rc);
}
*value = nv;
*value = nvlist_import(nv + 4, nv[0], nv[1]);
free(nv);
return (rc);
}
@ -3345,7 +3120,7 @@ zfs_spa_init(spa_t *spa)
{
dnode_phys_t dir;
uint64_t config_object;
unsigned char *nvlist;
nvlist_t *nvlist;
int rc;
if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {
@ -3383,13 +3158,12 @@ zfs_spa_init(spa_t *spa)
rc = load_nvlist(spa, config_object, &nvlist);
if (rc != 0)
return (rc);
/*
* Update vdevs from MOS config. Note, we do skip encoding bytes
* here. See also vdev_label_read_config().
*/
rc = vdev_init_from_nvlist(spa, nvlist + 4);
free(nvlist);
rc = vdev_init_from_nvlist(spa, nvlist);
nvlist_destroy(nvlist);
return (rc);
}

View file

@ -136,6 +136,7 @@ CFLAGS+= -DLOADER_MBR_SUPPORT
CFLAGS+= -DLOADER_ZFS_SUPPORT
CFLAGS+= -I${ZFSSRC}
CFLAGS+= -I${SYSDIR}/cddl/boot/zfs
CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common
SRCS+= zfs_cmd.c
.endif

View file

@ -34,6 +34,7 @@ SRCS+= vers.c
CFLAGS+= -Wall
CFLAGS+= -I${BOOTSRC}/userboot
CFLAGS+= -I${SYSDIR}/cddl/contrib/opensolaris/uts/common
CWARNFLAGS.main.c += -Wno-implicit-function-declaration
LDFLAGS+= -nostdlib -Wl,-Bsymbolic

View file

@ -56,9 +56,16 @@
* Copyright 2013 by Saso Kiselkov. All rights reserved.
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2020 by Delphix. All rights reserved.
*/
#include <sys/queue.h>
#include <sys/list.h>
#include <bootstrap.h>
#ifndef _ZFSIMPL_H_
#define _ZFSIMPL_H_
#define MAXNAMELEN 256
#define _NOTE(s)
@ -493,7 +500,7 @@ typedef struct zio_gbh {
#define VDEV_RAIDZ_MAXPARITY 3
#define VDEV_PAD_SIZE (8 << 10)
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
/* 2 padding areas (vl_pad1 and vl_be) to skip */
#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
@ -519,9 +526,29 @@ typedef struct vdev_phys {
zio_eck_t vp_zbt;
} vdev_phys_t;
typedef enum vbe_vers {
/* The bootenv file is stored as ascii text in the envblock */
VB_RAW = 0,
/*
* The bootenv file is converted to an nvlist and then packed into the
* envblock.
*/
VB_NVLIST = 1
} vbe_vers_t;
typedef struct vdev_boot_envblock {
uint64_t vbe_version;
char vbe_bootenv[VDEV_PAD_SIZE - sizeof (uint64_t) -
sizeof (zio_eck_t)];
zio_eck_t vbe_zbt;
} vdev_boot_envblock_t;
CTASSERT(sizeof (vdev_boot_envblock_t) == VDEV_PAD_SIZE);
typedef struct vdev_label {
char vl_pad1[VDEV_PAD_SIZE]; /* 8K */
char vl_pad2[VDEV_PAD_SIZE]; /* 8K */
vdev_boot_envblock_t vl_be; /* 8K */
vdev_phys_t vl_vdev_phys; /* 112K */
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
} vdev_label_t; /* 256K total */
@ -1811,3 +1838,5 @@ typedef struct zio {
} zio_t;
static void decode_embedded_bp_compressed(const blkptr_t *, void *);
#endif /* _ZFSIMPL_H_ */